first commit

This commit is contained in:
2026-02-25 23:49:54 -05:00
commit 4d097161cb
1775 changed files with 452827 additions and 0 deletions

7
app_factory/__init__.py Normal file
View File

@@ -0,0 +1,7 @@
"""App Factory - Autonomous multi-agent orchestration framework."""
from app_factory.core.graph import AppFactoryOrchestrator
from app_factory.core.workspace import WorkspaceManager
from app_factory.core.observability import ObservabilityManager
__all__ = ["AppFactoryOrchestrator", "WorkspaceManager", "ObservabilityManager"]

View File

@@ -0,0 +1,8 @@
"""Agent modules for the App Factory orchestration framework."""
from app_factory.agents.pm_agent import PMAgent
from app_factory.agents.task_agent import TaskMasterAgent
from app_factory.agents.dev_agent import DevAgentManager
from app_factory.agents.qa_agent import QAAgent
__all__ = ["PMAgent", "TaskMasterAgent", "DevAgentManager", "QAAgent"]

View File

@@ -0,0 +1,205 @@
"""Dev Agent Manager - Spawns Dev Agents in Docker containers via Claude Code."""
import asyncio
import logging
import os
import re
import tempfile
from pathlib import Path
import pexpect
logger = logging.getLogger(__name__)
PROMPT_TEMPLATE_PATH = Path(__file__).resolve().parent.parent / "prompts" / "dev_task_execution.txt"
class DevAgentManager:
"""Spawns Dev Agents in Docker containers, interfaces with Claude Code via pexpect."""
def __init__(self, docker_client=None, max_retries: int = 3, timeout: int = 1800):
"""Initialize DevAgentManager.
Args:
docker_client: Docker client instance (or None to create from env).
max_retries: Maximum Dev-QA bounce retries per task.
timeout: Timeout in seconds for Claude Code execution (default 30 min).
"""
if docker_client is not None:
self.docker_client = docker_client
else:
import docker
self.docker_client = docker.from_env()
self.max_retries = max_retries
self.timeout = timeout
self._retry_counts: dict[str, int] = {}
def prepare_task_prompt(self, task: dict, global_arch: str = "") -> str:
"""Build a prompt string for the Dev Agent from the template.
Args:
task: Task dict with keys task_id, title, description, details, testStrategy.
global_arch: Optional global architecture summary.
Returns:
Formatted prompt string.
"""
template = PROMPT_TEMPLATE_PATH.read_text()
return template.format(
task_id=task.get("task_id", task.get("id", "")),
title=task.get("title", ""),
description=task.get("description", ""),
details=task.get("details", ""),
test_strategy=task.get("testStrategy", ""),
global_architecture=global_arch or "No architecture context provided.",
)
async def execute_task(
self,
task: dict,
container_id: str,
worktree_path: str,
global_arch: str = "",
) -> dict:
"""Execute a task inside a Docker container using Claude Code.
Args:
task: Task dict.
container_id: Docker container ID to exec into.
worktree_path: Host path to the worktree (mounted at /workspace).
global_arch: Optional architecture context.
Returns:
Dict with status, output, files_changed, and exit_code.
"""
prompt = self.prepare_task_prompt(task, global_arch)
# Write prompt to temp file in worktree so it's visible inside the container
prompt_file = os.path.join(worktree_path, ".task_prompt.txt")
with open(prompt_file, "w") as f:
f.write(prompt)
cmd = f"docker exec {container_id} claude --print --prompt-file /workspace/.task_prompt.txt"
try:
child = pexpect.spawn(cmd, timeout=self.timeout, encoding="utf-8")
child.expect(pexpect.EOF, timeout=self.timeout)
output = child.before or ""
child.close()
exit_code = child.exitstatus if child.exitstatus is not None else -1
except pexpect.TIMEOUT:
try:
child.close(force=True)
except Exception:
pass
return {
"status": "failed",
"output": "timeout",
"files_changed": [],
"exit_code": -1,
}
finally:
# Clean up prompt file
try:
os.remove(prompt_file)
except OSError:
pass
parsed = self.parse_claude_output(output)
if exit_code == 0:
status = "success"
else:
status = "failed"
return {
"status": status,
"output": output,
"files_changed": parsed["files_changed"],
"exit_code": exit_code,
}
def parse_claude_output(self, output: str) -> dict:
"""Parse Claude Code output to extract structured info.
Args:
output: Raw stdout from Claude Code.
Returns:
Dict with files_changed, test_results, and errors.
"""
# Extract file paths (common patterns: Created/Modified/Updated path/to/file.py)
file_patterns = re.findall(
r"(?:(?:Creat|Modifi|Updat|Edit|Writ)(?:ed|ing)\s+)([^\s]+\.\w+)",
output,
)
# Also catch paths that look like source files mentioned standalone
standalone_paths = re.findall(
r"(?:^|\s)([\w./]+\.(?:py|js|ts|yaml|yml|json|txt|md|toml|cfg))\b",
output,
)
all_files = list(dict.fromkeys(file_patterns + standalone_paths)) # dedupe, preserve order
# Extract test results
test_results = {}
passed_match = re.search(r"(\d+)\s+passed", output)
failed_match = re.search(r"(\d+)\s+failed", output)
if passed_match:
test_results["passed"] = int(passed_match.group(1))
if failed_match:
test_results["failed"] = int(failed_match.group(1))
# Extract error messages
errors = re.findall(r"(?:Error|Exception|FAILED)[:\s]+(.*?)(?:\n|$)", output, re.IGNORECASE)
return {
"files_changed": all_files,
"test_results": test_results,
"errors": errors,
}
async def execute_with_retry(
self,
task: dict,
container_id: str,
worktree_path: str,
global_arch: str = "",
) -> dict:
"""Execute a task with retry logic.
Retries up to max_retries times on failure. If all retries are exhausted,
returns a result with status 'needs_clarification'.
Args:
task: Task dict.
container_id: Docker container ID.
worktree_path: Host worktree path.
global_arch: Optional architecture context.
Returns:
Final execution result dict.
"""
task_id = str(task.get("task_id", task.get("id", "")))
for attempt in range(self.max_retries):
self._retry_counts[task_id] = attempt + 1
result = await self.execute_task(task, container_id, worktree_path, global_arch)
if result["status"] == "success":
return result
# All retries exhausted
return {
"status": "needs_clarification",
"output": result.get("output", ""),
"files_changed": result.get("files_changed", []),
"exit_code": result.get("exit_code", -1),
}
def get_retry_count(self, task_id: str) -> int:
"""Return current retry count for a task."""
return self._retry_counts.get(task_id, 0)
def reset_retry_count(self, task_id: str):
"""Reset retry counter for a task (after clarification resolved)."""
self._retry_counts.pop(task_id, None)

View File

@@ -0,0 +1,136 @@
"""Project Manager Agent - Expands user prompts into structured PRDs and handles clarification requests."""
import os
from datetime import datetime, timezone
from pathlib import Path
from app_factory.core.claude_client import ClaudeSDKClient
class PMAgent:
"""Agent responsible for PRD generation, clarification handling, and project planning."""
def __init__(
self,
api_key: str = None,
auth_token: str = None,
model: str = "claude-opus-4-6",
debug: bool = False,
observability=None,
):
self.model = model
self.input_tokens = 0
self.output_tokens = 0
self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
self.observability = observability
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
self.client = ClaudeSDKClient(
api_key=resolved_key,
auth_token=resolved_auth,
enable_debug=debug,
)
def _load_template(self, template_name: str) -> str:
"""Load a prompt template file from app_factory/prompts/."""
path = self._prompts_dir / template_name
return path.read_text()
async def expand_prompt_to_prd(self, user_input: str) -> str:
"""Expand a user prompt into a structured PRD using Claude.
Returns markdown with sections: Objective, Core Requirements,
Technical Architecture, Tech Stack, Success Criteria, Non-Functional Requirements.
"""
system_prompt = self._load_template("pm_prd_expansion.txt")
response = await self.client.complete(
prompt=user_input,
model=self.model,
system_prompt=system_prompt,
max_turns=100,
observability=self.observability,
agent_name="pm_agent",
task_id="expand_prd",
)
self.input_tokens += response.input_tokens
self.output_tokens += response.output_tokens
if self.observability:
self.observability.log_token_usage(
"pm_agent",
"expand_prd",
input_tokens=response.input_tokens,
output_tokens=response.output_tokens,
model=self.model,
)
return response.text
async def handle_clarification_request(self, clarification: dict) -> str:
"""Handle a clarification request from a downstream agent.
Args:
clarification: dict with keys requesting_agent, task_id, question, context.
Returns:
Clarification response string. If the question requires human input,
prompts the user and returns their answer.
"""
template = self._load_template("pm_clarification.txt")
prompt = template.format(
requesting_agent=clarification.get("requesting_agent", "unknown"),
task_id=clarification.get("task_id", "N/A"),
question=clarification.get("question", ""),
context=clarification.get("context", ""),
)
response = await self.client.complete(
prompt=prompt,
model=self.model,
max_turns=100,
observability=self.observability,
agent_name="pm_agent",
task_id=f"clarification:{clarification.get('task_id', 'N/A')}",
)
self.input_tokens += response.input_tokens
self.output_tokens += response.output_tokens
if self.observability:
self.observability.log_token_usage(
"pm_agent",
f"clarification:{clarification.get('task_id', 'N/A')}",
input_tokens=response.input_tokens,
output_tokens=response.output_tokens,
model=self.model,
)
answer = response.text.strip()
if "ESCALATE_TO_HUMAN" in answer:
human_answer = input(
f"[PMAgent] Clarification needed for {clarification.get('requesting_agent', 'agent')} "
f"(task {clarification.get('task_id', 'N/A')}): "
f"{clarification.get('question', '')}\n> "
)
return human_answer
return answer
def update_prd(self, prd_path: str, updates: str):
"""Append updates to an existing PRD file with a versioned header."""
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
header = f"\n\n---\n## PRD Update - {timestamp}\n\n"
with open(prd_path, "a") as f:
f.write(header)
f.write(updates)
def get_token_usage(self) -> dict:
"""Return cumulative token usage."""
return {
"input_tokens": self.input_tokens,
"output_tokens": self.output_tokens,
"total_tokens": self.input_tokens + self.output_tokens,
}

View File

@@ -0,0 +1,383 @@
"""QA Agent - Handles code review, testing, linting, and merge operations."""
import os
import re
import subprocess
from pathlib import Path
import git
from app_factory.core.claude_client import ClaudeSDKClient
class QAAgent:
"""Reviews code, runs tests, handles merge conflicts, merges worktrees to main."""
def __init__(
self,
repo_path: str,
api_key: str = None,
auth_token: str = None,
max_retries: int = 3,
debug: bool = False,
observability=None,
):
"""Initialize QAAgent.
Args:
repo_path: Path to the git repository.
api_key: Optional API key. Falls back to ANTHROPIC_API_KEY env var.
max_retries: Maximum QA-Dev bounce retries per task.
"""
self.repo = git.Repo(repo_path)
self.repo_path = Path(repo_path).resolve()
self.max_retries = max_retries
self._retry_counts: dict[str, int] = {}
self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
self.observability = observability
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
self.client = ClaudeSDKClient(
api_key=resolved_key,
auth_token=resolved_auth,
enable_debug=debug,
)
async def review_and_merge(self, task_id: str, worktree_path: str, task: dict = None) -> dict:
"""Full QA pipeline: rebase, lint, test, review, merge.
Returns:
dict with status and details. Status is one of:
'merged', 'rebase_failed', 'lint_failed', 'tests_failed', 'review_failed'.
"""
# 1. Rebase feature branch onto main
rebase_result = await self.rebase_onto_main(worktree_path, task_id)
if not rebase_result["success"]:
self._increment_retry(task_id)
return {
"status": "rebase_failed",
"conflicts": rebase_result.get("conflicts", []),
"retry_count": self.get_retry_count(task_id),
}
# 2. Run linting
lint_result = self.run_linter(worktree_path)
if not lint_result["passed"]:
self._increment_retry(task_id)
return {
"status": "lint_failed",
"errors": lint_result["errors"],
"warnings": lint_result["warnings"],
"retry_count": self.get_retry_count(task_id),
}
# 3. Run tests
test_result = self.run_tests(worktree_path)
if not test_result["passed"]:
self._increment_retry(task_id)
return {
"status": "tests_failed",
"total": test_result["total"],
"failures": test_result["failures"],
"errors": test_result["errors"],
"output": test_result["output"],
"retry_count": self.get_retry_count(task_id),
}
# 4. Code review via Claude
wt_repo = git.Repo(worktree_path)
diff = wt_repo.git.diff("main", "--", ".")
review_result = await self.code_review(diff, task=task)
if not review_result["approved"]:
self._increment_retry(task_id)
return {
"status": "review_failed",
"issues": review_result["issues"],
"summary": review_result["summary"],
"retry_count": self.get_retry_count(task_id),
}
# 5. Merge to main
merge_result = self.merge_to_main(worktree_path, task_id)
if not merge_result["success"]:
return {
"status": "merge_failed",
"error": merge_result.get("error", "Unknown merge error"),
}
return {
"status": "merged",
"commit_sha": merge_result["commit_sha"],
"review_summary": review_result["summary"],
}
async def rebase_onto_main(self, worktree_path: str, task_id: str) -> dict:
"""Rebase the feature branch in the worktree onto main.
Returns:
dict with success bool and conflicts list.
"""
wt_repo = git.Repo(worktree_path)
try:
wt_repo.git.fetch("origin", "main")
except git.GitCommandError:
pass # fetch may fail in local-only repos; continue with local main
try:
wt_repo.git.rebase("main")
return {"success": True, "conflicts": []}
except git.GitCommandError:
# Rebase failed — check for conflicts
conflicts = self._get_conflict_files(wt_repo)
if conflicts and self.auto_resolve_conflicts(worktree_path):
return {"success": True, "conflicts": []}
# Abort the failed rebase
try:
wt_repo.git.rebase("--abort")
except git.GitCommandError:
pass
return {"success": False, "conflicts": conflicts}
def run_linter(self, worktree_path: str) -> dict:
"""Run ruff linter on the worktree.
Returns:
dict with passed bool, errors list, and warnings list.
"""
try:
result = subprocess.run(
["ruff", "check", "."],
cwd=worktree_path,
capture_output=True,
text=True,
timeout=120,
)
except FileNotFoundError:
return {"passed": True, "errors": [], "warnings": ["ruff not found, skipping lint"]}
except subprocess.TimeoutExpired:
return {"passed": False, "errors": ["Linter timed out"], "warnings": []}
errors = []
warnings = []
for line in result.stdout.splitlines():
line = line.strip()
if not line or line.startswith("Found") or line.startswith("All checks"):
continue
# ruff output lines contain error codes like E501, W291, etc.
if re.search(r"\b[A-Z]\d{3,4}\b", line):
errors.append(line)
elif line:
warnings.append(line)
passed = result.returncode == 0
return {"passed": passed, "errors": errors, "warnings": warnings}
def run_tests(self, worktree_path: str) -> dict:
"""Run pytest in the worktree.
Returns:
dict with passed bool, total/failures/errors counts, and raw output.
"""
try:
result = subprocess.run(
["python", "-m", "pytest", "-v", "--tb=short"],
cwd=worktree_path,
capture_output=True,
text=True,
timeout=300,
)
except FileNotFoundError:
return {"passed": False, "total": 0, "failures": 0, "errors": 1,
"output": "pytest not found"}
except subprocess.TimeoutExpired:
return {"passed": False, "total": 0, "failures": 0, "errors": 1,
"output": "Test execution timed out"}
output = result.stdout + result.stderr
parsed = self.parse_test_results(output)
parsed["output"] = output
return parsed
async def code_review(self, diff: str, task: dict = None) -> dict:
"""Review a diff using Claude for quality and security issues.
Returns:
dict with approved bool, issues list, and summary string.
"""
template = self._load_template("qa_review.txt")
task_context = ""
if task:
task_context = (
f"Task ID: {task.get('id', 'N/A')}\n"
f"Title: {task.get('title', 'N/A')}\n"
f"Description: {task.get('description', 'N/A')}"
)
prompt = template.format(task_context=task_context, diff=diff)
response = await self.client.complete(
prompt=prompt,
model="claude-sonnet-4-6",
max_turns=100,
observability=self.observability,
agent_name="qa_agent",
task_id=str(task.get("id", task.get("task_id", "review"))) if task else "review",
)
if self.observability:
self.observability.log_token_usage(
"qa_agent",
str(task.get("id", task.get("task_id", "review"))) if task else "review",
input_tokens=response.input_tokens,
output_tokens=response.output_tokens,
model="claude-sonnet-4-6",
)
text = response.text
return self._parse_review_response(text)
def merge_to_main(self, worktree_path: str, task_id: str) -> dict:
"""Merge the feature branch into main with --no-ff.
Returns:
dict with success bool and commit_sha.
"""
branch_name = f"feature/task-{task_id}"
try:
self.repo.git.checkout("main")
self.repo.git.merge("--no-ff", branch_name, m=f"Merge {branch_name}")
commit_sha = self.repo.head.commit.hexsha
return {"success": True, "commit_sha": commit_sha}
except git.GitCommandError as e:
return {"success": False, "commit_sha": None, "error": str(e)}
def auto_resolve_conflicts(self, worktree_path: str) -> bool:
"""Try to auto-resolve simple merge conflicts.
Returns True if all conflicts were resolved.
"""
wt_repo = git.Repo(worktree_path)
unmerged = wt_repo.index.unmerged_blobs()
if not unmerged:
return True
for path in unmerged:
file_path = os.path.join(worktree_path, path)
if not os.path.exists(file_path):
continue
try:
with open(file_path) as f:
content = f.read()
# Accept "theirs" (incoming) for simple conflicts
if "<<<<<<< " in content and "=======" in content and ">>>>>>> " in content:
resolved = re.sub(
r"<<<<<<< [^\n]*\n.*?=======\n(.*?)>>>>>>> [^\n]*\n",
r"\1",
content,
flags=re.DOTALL,
)
with open(file_path, "w") as f:
f.write(resolved)
wt_repo.index.add([path])
else:
return False
except Exception:
return False
try:
wt_repo.git.rebase("--continue")
return True
except git.GitCommandError:
return False
def parse_test_results(self, output: str) -> dict:
"""Parse pytest output into structured results.
Returns:
dict with passed bool, total int, failures int, errors int.
"""
# Match pytest summary line like "5 passed, 2 failed, 1 error"
passed_count = 0
failed_count = 0
error_count = 0
# Look for the summary line
summary_match = re.search(
r"=+\s*(.*?)\s*=+\s*$",
output,
re.MULTILINE,
)
if summary_match:
summary_line = summary_match.group(1)
p = re.search(r"(\d+)\s+passed", summary_line)
f = re.search(r"(\d+)\s+failed", summary_line)
e = re.search(r"(\d+)\s+error", summary_line)
if p:
passed_count = int(p.group(1))
if f:
failed_count = int(f.group(1))
if e:
error_count = int(e.group(1))
total = passed_count + failed_count + error_count
all_passed = failed_count == 0 and error_count == 0 and total > 0
return {
"passed": all_passed,
"total": total,
"failures": failed_count,
"errors": error_count,
}
def get_retry_count(self, task_id: str) -> int:
"""Return QA retry count for a task."""
return self._retry_counts.get(task_id, 0)
def _increment_retry(self, task_id: str):
"""Increment the retry counter for a task."""
self._retry_counts[task_id] = self._retry_counts.get(task_id, 0) + 1
def _load_template(self, template_name: str) -> str:
"""Load a prompt template file from app_factory/prompts/."""
path = self._prompts_dir / template_name
return path.read_text()
def _get_conflict_files(self, repo: git.Repo) -> list[str]:
"""Get list of conflicting files from a repo."""
try:
status_output = repo.git.status("--porcelain")
conflicts = []
for line in status_output.splitlines():
if line.startswith("UU ") or line.startswith("AA "):
conflicts.append(line[3:].strip())
return conflicts
except git.GitCommandError:
return []
def _parse_review_response(self, text: str) -> dict:
"""Parse Claude's review response into structured data."""
approved = False
issues = []
summary = ""
for line in text.splitlines():
line = line.strip()
if line.upper().startswith("APPROVED:"):
value = line.split(":", 1)[1].strip().lower()
approved = value in ("true", "yes")
elif line.startswith("- ["):
# Parse issue lines like "- [severity: critical] description"
issue_match = re.match(
r"-\s*\[severity:\s*(critical|warning|info)\]\s*(.*)",
line,
re.IGNORECASE,
)
if issue_match:
issues.append({
"severity": issue_match.group(1).lower(),
"description": issue_match.group(2).strip(),
})
elif line.upper().startswith("SUMMARY:"):
summary = line.split(":", 1)[1].strip()
return {"approved": approved, "issues": issues, "summary": summary}

View File

@@ -0,0 +1,180 @@
"""Task Master Agent - Bridge to claude-task-master for task graph management."""
import asyncio
import json
import logging
import os
import subprocess
from pathlib import Path
logger = logging.getLogger(__name__)
class TaskMasterAgent:
"""Bridge to claude-task-master for task graph management and dependency resolution."""
def __init__(self, project_root: str, mcp_client=None):
self.project_root = str(project_root)
self.mcp_client = mcp_client
self.max_retries = 3
self.base_delay = 1.0
async def parse_prd(self, prd_content: str, num_tasks: int = 10) -> dict:
"""Write PRD content to disk and invoke task-master parse-prd."""
docs_dir = Path(self.project_root) / ".taskmaster" / "docs"
docs_dir.mkdir(parents=True, exist_ok=True)
prd_path = docs_dir / "prd.md"
prd_path.write_text(prd_content)
result = await self._call_with_retry(
self._run_cli,
"parse-prd",
str(prd_path),
"--num-tasks",
str(num_tasks),
"--force",
)
return result
async def get_unblocked_tasks(self) -> list:
"""Get all pending tasks whose dependencies are all done."""
result = await self._call_with_retry(self._run_cli, "list", "--json")
tasks = result.get("tasks", [])
done_ids = {
str(t["id"]) for t in tasks if t.get("status") == "done"
}
unblocked = []
for task in tasks:
if task.get("status") != "pending":
continue
deps = [str(d) for d in task.get("dependencies", [])]
if all(d in done_ids for d in deps):
unblocked.append(task)
return unblocked
async def update_task_status(
self, task_id: str, status: str, notes: str = ""
):
"""Update a task's status and optionally add implementation notes."""
await self._call_with_retry(
self._run_cli,
"set-status",
f"--id={task_id}",
f"--status={status}",
)
if notes:
await self._call_with_retry(
self._run_cli,
"update-subtask",
f"--id={task_id}",
f"--prompt={notes}",
)
async def get_task_details(self, task_id: str) -> dict:
"""Get full details for a specific task."""
result = await self._call_with_retry(
self._run_cli, "show", str(task_id), "--json"
)
task = result.get("task", result)
return {
"id": task.get("id"),
"title": task.get("title", ""),
"description": task.get("description", ""),
"details": task.get("details", ""),
"testStrategy": task.get("testStrategy", ""),
"dependencies": task.get("dependencies", []),
"subtasks": task.get("subtasks", []),
"status": task.get("status", "pending"),
"priority": task.get("priority", ""),
}
async def get_next_task(self) -> dict | None:
"""Get the highest-priority unblocked task, or None."""
try:
result = await self._call_with_retry(
self._run_cli, "next", "--json"
)
task = result.get("task", result)
if task and task.get("id"):
return task
except RuntimeError:
logger.debug("next_task command failed, falling back to manual selection")
unblocked = await self.get_unblocked_tasks()
if not unblocked:
return None
priority_order = {"high": 0, "medium": 1, "low": 2}
unblocked.sort(
key=lambda t: (
priority_order.get(t.get("priority", "medium"), 1),
t.get("id", 0),
)
)
return unblocked[0]
async def expand_task(self, task_id: str, num_subtasks: int = 5) -> dict:
"""Break a task into subtasks."""
result = await self._call_with_retry(
self._run_cli,
"expand",
f"--id={task_id}",
f"--num={num_subtasks}",
"--force",
)
return result
async def _call_with_retry(self, func, *args, **kwargs):
"""Retry with exponential backoff."""
last_exc = None
for attempt in range(self.max_retries):
try:
return await func(*args, **kwargs)
except Exception as exc:
last_exc = exc
if attempt < self.max_retries - 1:
delay = self.base_delay * (2 ** attempt)
logger.warning(
"Attempt %d/%d failed: %s. Retrying in %.1fs",
attempt + 1,
self.max_retries,
exc,
delay,
)
await asyncio.sleep(delay)
raise RuntimeError(
f"All {self.max_retries} attempts failed. Last error: {last_exc}"
) from last_exc
async def _run_cli(self, *args: str) -> dict:
"""Execute a task-master CLI command and return parsed JSON output."""
cmd = ["task-master", *args]
logger.debug("Running CLI: %s", " ".join(cmd))
proc = await asyncio.get_event_loop().run_in_executor(
None,
lambda: subprocess.run(
cmd,
capture_output=True,
text=True,
cwd=self.project_root,
timeout=120,
),
)
if proc.returncode != 0:
raise RuntimeError(
f"task-master {args[0]} failed (rc={proc.returncode}): {proc.stderr.strip()}"
)
stdout = proc.stdout.strip()
if not stdout:
return {}
try:
return json.loads(stdout)
except json.JSONDecodeError:
return {"raw_output": stdout}

View File

@@ -0,0 +1,8 @@
"""Core modules for the App Factory orchestration framework."""
from app_factory.core.graph import AppFactoryOrchestrator, AppFactoryState
from app_factory.core.workspace import WorkspaceManager
from app_factory.core.observability import ObservabilityManager
from app_factory.core.architecture_tracker import ArchitectureTracker
__all__ = ["AppFactoryOrchestrator", "AppFactoryState", "WorkspaceManager", "ObservabilityManager", "ArchitectureTracker"]

View File

@@ -0,0 +1,300 @@
"""Architecture Tracker - Tracks global architecture to prevent context starvation and code duplication."""
import ast
import json
import logging
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from app_factory.core.claude_client import ClaudeSDKClient
logger = logging.getLogger(__name__)
class ArchitectureTracker:
"""Tracks global architecture to prevent Dev Agent context starvation and code duplication."""
def __init__(
self,
data_dir: str = "app_factory/data",
api_key: str = None,
auth_token: str = None,
debug: bool = False,
observability=None,
):
"""Initialize ArchitectureTracker.
Args:
data_dir: Directory for storing global_architecture.json.
api_key: Optional API key for AI-powered summarization.
"""
self.data_dir = Path(data_dir)
self.data_dir.mkdir(parents=True, exist_ok=True)
self._arch_path = self.data_dir / "global_architecture.json"
self.observability = observability
self._client = None
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
if resolved_key or resolved_auth:
try:
self._client = ClaudeSDKClient(
api_key=resolved_key,
auth_token=resolved_auth,
enable_debug=debug,
)
except Exception as exc:
logger.warning("Claude SDK unavailable (%s). AI summarization disabled.", exc)
self._architecture = self.load_architecture()
def _default_architecture(self) -> dict:
"""Return default architecture schema."""
return {
"modules": [],
"utilities": [],
"design_patterns": [],
"naming_conventions": {
"variables": "snake_case",
"classes": "PascalCase",
"functions": "snake_case",
"constants": "UPPER_SNAKE_CASE",
},
"tech_stack": {
"language": "Python",
"framework": "LangGraph",
},
"version": 1,
"last_updated": datetime.now(timezone.utc).isoformat(),
}
def load_architecture(self) -> dict:
"""Load from global_architecture.json or return default."""
if self._arch_path.exists():
try:
with open(self._arch_path, "r") as f:
return json.load(f)
except (json.JSONDecodeError, OSError) as exc:
logger.warning("Failed to load architecture file (%s). Using default.", exc)
return self._default_architecture()
def save_architecture(self, data: dict):
"""Save to global_architecture.json with timestamp update."""
data["last_updated"] = datetime.now(timezone.utc).isoformat()
with open(self._arch_path, "w") as f:
json.dump(data, f, indent=2)
self._architecture = data
async def update_architecture(self, completed_task: dict, files_changed: list):
"""Update architecture based on completed task and changed files.
Args:
completed_task: Dict with task info (e.g. title, description).
files_changed: List of file paths that were modified.
"""
new_modules = []
new_utilities = []
for file_path in files_changed:
if not os.path.exists(file_path) or not file_path.endswith(".py"):
continue
try:
with open(file_path, "r") as f:
source = f.read()
except OSError:
continue
if self._client:
await self._ai_extract(source, file_path, new_modules, new_utilities)
else:
self._basic_extract(source, file_path, new_modules, new_utilities)
existing_module_names = {m["name"] for m in self._architecture["modules"]}
for mod in new_modules:
if mod["name"] not in existing_module_names:
self._architecture["modules"].append(mod)
existing_module_names.add(mod["name"])
existing_utility_names = {u["name"] for u in self._architecture["utilities"]}
for util in new_utilities:
if util["name"] not in existing_utility_names:
self._architecture["utilities"].append(util)
existing_utility_names.add(util["name"])
self.save_architecture(self._architecture)
async def _ai_extract(
self, source: str, file_path: str, modules: list, utilities: list
):
"""Use Claude to extract architecture info from source code."""
prompt = (
"Analyze this Python source file and extract:\n"
"1. Module-level classes (name, purpose)\n"
"2. Utility functions (name, description)\n"
"Respond ONLY with valid JSON: "
'{"classes": [{"name": "...", "purpose": "..."}], '
'"functions": [{"name": "...", "description": "..."}]}\n\n'
f"File: {file_path}\n```python\n{source[:4000]}\n```"
)
try:
response = await self._client.complete(
prompt=prompt,
model="claude-sonnet-4-6",
max_turns=100,
observability=self.observability,
agent_name="architecture_tracker",
task_id=f"ai_extract:{Path(file_path).name}",
)
if self.observability:
self.observability.log_token_usage(
"architecture_tracker",
f"ai_extract:{Path(file_path).name}",
input_tokens=response.input_tokens,
output_tokens=response.output_tokens,
model="claude-sonnet-4-6",
)
text = response.text
# Extract JSON from response
start = text.find("{")
end = text.rfind("}") + 1
if start >= 0 and end > start:
data = json.loads(text[start:end])
for cls in data.get("classes", []):
modules.append({
"name": cls["name"],
"purpose": cls.get("purpose", ""),
"file_path": file_path,
})
for func in data.get("functions", []):
utilities.append({
"name": func["name"],
"description": func.get("description", ""),
"file_path": file_path,
})
except Exception as exc:
logger.warning("AI extraction failed (%s). Falling back to basic.", exc)
self._basic_extract(source, file_path, modules, utilities)
def _basic_extract(
self, source: str, file_path: str, modules: list, utilities: list
):
"""Extract architecture info using AST parsing."""
try:
tree = ast.parse(source)
except SyntaxError:
return
for node in ast.iter_child_nodes(tree):
if isinstance(node, ast.ClassDef):
docstring = ast.get_docstring(node) or ""
modules.append({
"name": node.name,
"purpose": docstring.split("\n")[0] if docstring else "",
"file_path": file_path,
})
elif isinstance(node, ast.FunctionDef) or isinstance(node, ast.AsyncFunctionDef):
if node.name.startswith("_"):
continue
docstring = ast.get_docstring(node) or ""
utilities.append({
"name": node.name,
"description": docstring.split("\n")[0] if docstring else "",
"file_path": file_path,
})
def get_architecture_summary(self, max_tokens: int = 2000) -> str:
"""Generate concise text summary from architecture data.
Args:
max_tokens: Approximate max tokens for the summary (~4 chars per token).
Returns:
Formatted string for injection into Dev Agent prompts.
"""
max_chars = max_tokens * 4
parts = []
parts.append("## Project Architecture Summary")
parts.append("")
# Tech stack
tech = self._architecture.get("tech_stack", {})
if tech:
parts.append("### Tech Stack")
for key, value in tech.items():
parts.append(f"- {key}: {value}")
parts.append("")
# Modules
mods = self._architecture.get("modules", [])
if mods:
parts.append("### Modules")
for m in mods:
line = f"- **{m['name']}** ({m.get('file_path', '')}): {m.get('purpose', '')}"
parts.append(line)
parts.append("")
# Utilities
utils = self._architecture.get("utilities", [])
if utils:
parts.append("### Shared Utilities")
for u in utils:
line = f"- **{u['name']}** ({u.get('file_path', '')}): {u.get('description', '')}"
parts.append(line)
parts.append("")
# Design patterns
patterns = self._architecture.get("design_patterns", [])
if patterns:
parts.append("### Design Patterns")
for p in patterns:
parts.append(f"- {p.get('pattern', '')}: {p.get('usage', '')}")
parts.append("")
# Naming conventions
conventions = self._architecture.get("naming_conventions", {})
if conventions:
parts.append("### Naming Conventions")
for key, value in conventions.items():
parts.append(f"- {key}: {value}")
parts.append("")
summary = "\n".join(parts)
if len(summary) > max_chars:
summary = summary[:max_chars - 3] + "..."
return summary
def add_module(self, name: str, purpose: str, file_path: str):
"""Manually add a module to the architecture.
Args:
name: Module/class name.
purpose: Brief description of what it does.
file_path: Path to the source file.
"""
self._architecture["modules"].append({
"name": name,
"purpose": purpose,
"file_path": file_path,
})
self.save_architecture(self._architecture)
def add_utility(self, name: str, description: str, file_path: str):
"""Manually add a utility function to the architecture.
Args:
name: Function name.
description: Brief description of what it does.
file_path: Path to the source file.
"""
self._architecture["utilities"].append({
"name": name,
"description": description,
"file_path": file_path,
})
self.save_architecture(self._architecture)

View File

@@ -0,0 +1,721 @@
"""Shared Claude Agent SDK client wrapper."""
from __future__ import annotations
import asyncio
import logging
import os
import shutil
import tempfile
from dataclasses import dataclass
from importlib import import_module
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
@dataclass
class ClaudeCompletion:
"""Normalized completion result from Claude Agent SDK."""
text: str
input_tokens: int = 0
output_tokens: int = 0
raw_usage: dict[str, Any] | None = None
def _load_sdk() -> tuple[Any, Any]:
"""Load Claude Agent SDK symbols."""
try:
mod = import_module("claude_agent_sdk")
return mod.query, mod.ClaudeAgentOptions
except Exception as exc:
raise ImportError(
"Claude Agent SDK is not installed. Install 'claude-agent-sdk'."
) from exc
class ClaudeSDKClient:
"""Small adapter over Claude Agent SDK query() streaming API."""
_RATE_LIMIT_RETRY_TIME_MARKS_SECONDS: tuple[float, ...] = (0.2, 1.0, 5.0)
_SENSITIVE_KEY_TOKENS: tuple[str, ...] = (
"api_key",
"apikey",
"auth",
"token",
"secret",
"password",
"authorization",
"cookie",
)
def __init__(
self,
api_key: str | None = None,
auth_token: str | None = None,
enable_debug: bool = False,
):
self._query: Any | None = None
self._options_cls: Any | None = None
self._env: dict[str, str] = {}
self._enable_debug = enable_debug
if api_key:
self._env["ANTHROPIC_API_KEY"] = api_key
if auth_token:
self._env["ANTHROPIC_AUTH_TOKEN"] = auth_token
async def complete(
self,
prompt: str,
*,
model: str | None = None,
system_prompt: str | None = None,
max_turns: int = 100,
cwd: str | None = None,
env: dict[str, str] | None = None,
observability: Any | None = None,
agent_name: str = "claude_sdk",
task_id: str = "-",
) -> ClaudeCompletion:
"""Run a single-turn completion and normalize text/token usage."""
self._ensure_sdk_loaded()
self._emit_observability_event(
observability,
agent_name,
task_id,
"request_start",
{
"model": model,
"max_turns": max_turns,
"cwd": cwd,
"prompt_chars": len(prompt),
"system_prompt_chars": len(system_prompt) if system_prompt else 0,
},
)
options_kwargs: dict[str, Any] = {"max_turns": max_turns}
if model:
options_kwargs["model"] = model
if system_prompt:
options_kwargs["system_prompt"] = system_prompt
if cwd:
options_kwargs["cwd"] = cwd
effective_env = dict(self._env)
if env:
effective_env.update(env)
effective_env = self._ensure_claude_home_writable(effective_env, cwd=cwd)
if effective_env:
options_kwargs["env"] = effective_env
total_attempts = len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS) + 1
for attempt in range(total_attempts):
self._emit_observability_event(
observability,
agent_name,
task_id,
"attempt_start",
{"attempt": attempt + 1, "total_attempts": total_attempts},
)
debug_stderr = None
if self._enable_debug:
debug_stderr = tempfile.TemporaryFile(mode="w+t", encoding="utf-8")
attempt_options_kwargs = dict(options_kwargs)
if debug_stderr is not None:
attempt_options_kwargs["debug_stderr"] = debug_stderr
attempt_options_kwargs["extra_args"] = {"debug-to-stderr": None}
options = self._options_cls(**attempt_options_kwargs)
assistant_parts: list[str] = []
result_text: str | None = None
usage: dict[str, Any] | None = None
error_text: str | None = None
result_subtype: str | None = None
session_id: str | None = None
stderr_detail = ""
try:
async for msg in self._query(prompt=prompt, options=options):
session_id = self._record_stream_message(
msg=msg,
observability=observability,
agent_name=agent_name,
task_id=task_id,
current_session_id=session_id,
)
content = getattr(msg, "content", None)
# Only assistant messages contain model output content.
if content and hasattr(msg, "model"):
for block in content:
text = getattr(block, "text", None)
if text:
assistant_parts.append(text)
msg_result = getattr(msg, "result", None)
if isinstance(msg_result, str) and msg_result.strip():
result_text = msg_result
msg_subtype = getattr(msg, "subtype", None)
if isinstance(msg_subtype, str):
result_subtype = msg_subtype
msg_usage = getattr(msg, "usage", None)
if isinstance(msg_usage, dict):
usage = msg_usage
if getattr(msg, "is_error", False):
error_text = msg_result if isinstance(msg_result, str) else "Claude SDK error"
stderr_detail = self._combine_stderr_details(self._read_debug_stderr(debug_stderr))
except Exception as exc:
stderr_detail = self._combine_stderr_details(
self._read_debug_stderr(debug_stderr),
self._extract_exception_stderr(exc),
)
error_message = self._format_error(
f"Claude SDK query failed: {exc}",
stderr_detail,
add_hint=True,
)
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
self._emit_observability_event(
observability,
agent_name,
task_id,
"request_error",
{
"attempt": attempt + 1,
"error": self._truncate_text(str(exc)),
"retrying": should_retry,
"stderr": self._truncate_text(stderr_detail),
},
)
if should_retry:
continue
raise RuntimeError(error_message) from exc
finally:
if debug_stderr is not None:
debug_stderr.close()
if error_text:
error_message = self._format_error(error_text, stderr_detail, add_hint=True)
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
self._emit_observability_event(
observability,
agent_name,
task_id,
"request_error",
{
"attempt": attempt + 1,
"error": self._truncate_text(error_text),
"retrying": should_retry,
"stderr": self._truncate_text(stderr_detail),
},
)
if should_retry:
continue
raise RuntimeError(error_message)
if result_subtype and "error" in result_subtype.lower():
error_message = self._format_error(
f"Claude SDK execution ended with subtype '{result_subtype}'.",
stderr_detail,
add_hint=True,
)
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
self._emit_observability_event(
observability,
agent_name,
task_id,
"request_error",
{
"attempt": attempt + 1,
"error": f"result subtype={result_subtype}",
"retrying": should_retry,
"stderr": self._truncate_text(stderr_detail),
},
)
if should_retry:
continue
raise RuntimeError(error_message)
text = (result_text or "\n".join(assistant_parts)).strip()
if not text:
error_message = self._format_error(
"Claude SDK returned empty response",
stderr_detail,
add_hint=True,
)
should_retry = await self._should_retry_rate_limit_error(error_message, attempt)
self._emit_observability_event(
observability,
agent_name,
task_id,
"request_error",
{
"attempt": attempt + 1,
"error": "empty response",
"retrying": should_retry,
"stderr": self._truncate_text(stderr_detail),
},
)
if should_retry:
continue
raise RuntimeError(error_message)
input_tokens, output_tokens = self._extract_token_counts(usage)
self._emit_observability_event(
observability,
agent_name,
task_id,
"request_complete",
{
"attempt": attempt + 1,
"session_id": session_id,
"result_subtype": result_subtype,
"result_preview": self._truncate_text(text, max_chars=180),
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"usage": self._sanitize_payload(usage),
},
)
return ClaudeCompletion(
text=text,
input_tokens=input_tokens,
output_tokens=output_tokens,
raw_usage=usage,
)
raise RuntimeError("Claude SDK retry loop exhausted unexpectedly")
def _ensure_sdk_loaded(self) -> None:
if self._query is not None and self._options_cls is not None:
return
self._query, self._options_cls = _load_sdk()
def _extract_token_counts(self, usage: dict[str, Any] | None) -> tuple[int, int]:
"""Best-effort token extraction across SDK usage payload variants."""
if not isinstance(usage, dict):
return 0, 0
input_tokens = self._to_int(
usage.get("input_tokens") or usage.get("inputTokens")
)
output_tokens = self._to_int(
usage.get("output_tokens") or usage.get("outputTokens")
)
if input_tokens == 0:
input_tokens = sum(
self._to_int(v)
for k, v in usage.items()
if "input" in k.lower() and "output" not in k.lower()
)
if output_tokens == 0:
output_tokens = sum(
self._to_int(v) for k, v in usage.items() if "output" in k.lower()
)
return input_tokens, output_tokens
def _ensure_claude_home_writable(
self,
env: dict[str, str],
*,
cwd: str | None = None,
) -> dict[str, str]:
"""Fallback to a project-local HOME when ~/.claude paths are not writable."""
effective = dict(env)
current_home = Path(effective.get("HOME") or str(Path.home())).expanduser()
if self._claude_home_is_writable(current_home):
return effective
fallback_root = Path(cwd or os.getcwd()) / ".app_factory" / "claude_home"
fallback_home = self._prepare_fallback_claude_home(
source_home=current_home,
fallback_home=fallback_root,
)
effective["HOME"] = str(fallback_home)
logger.warning(
"Claude home '%s' is not writable; using fallback HOME at '%s'.",
current_home,
fallback_home,
)
return effective
@staticmethod
def _claude_home_is_writable(home: Path) -> bool:
claude_dir = home / ".claude"
required_dirs = [claude_dir, claude_dir / "todos", claude_dir / "debug"]
config_file = home / ".claude.json"
try:
for directory in required_dirs:
directory.mkdir(parents=True, exist_ok=True)
probe = directory / ".app_factory_write_probe"
probe.write_text("ok", encoding="utf-8")
probe.unlink()
config_file.touch(exist_ok=True)
with open(config_file, "a", encoding="utf-8"):
pass
except OSError:
return False
return True
@staticmethod
def _prepare_fallback_claude_home(source_home: Path, fallback_home: Path) -> Path:
fallback_home.mkdir(parents=True, exist_ok=True)
fallback_claude_dir = fallback_home / ".claude"
(fallback_claude_dir / "todos").mkdir(parents=True, exist_ok=True)
(fallback_claude_dir / "debug").mkdir(parents=True, exist_ok=True)
source_claude_dir = source_home / ".claude"
source_config = source_home / ".claude.json"
target_config = fallback_home / ".claude.json"
if source_config.exists() and source_config.is_file() and os.access(source_config, os.R_OK):
try:
shutil.copy2(source_config, target_config)
except OSError:
pass
else:
target_config.touch(exist_ok=True)
if source_claude_dir.exists() and source_claude_dir.is_dir() and os.access(
source_claude_dir, os.R_OK
):
try:
shutil.copytree(source_claude_dir, fallback_claude_dir, dirs_exist_ok=True)
except OSError:
pass
return fallback_home
@staticmethod
def _to_int(value: Any) -> int:
try:
return int(value)
except (TypeError, ValueError):
return 0
@staticmethod
def _read_debug_stderr(debug_stderr: Any) -> str:
if debug_stderr is None:
return ""
try:
debug_stderr.flush()
debug_stderr.seek(0)
value = debug_stderr.read()
if isinstance(value, str):
return value.strip()
except Exception:
pass
return ""
@staticmethod
def _extract_exception_stderr(exc: Exception) -> str:
stderr = getattr(exc, "stderr", None)
return stderr.strip() if isinstance(stderr, str) else ""
@staticmethod
def _combine_stderr_details(*details: str) -> str:
merged: list[str] = []
seen: set[str] = set()
for detail in details:
value = detail.strip() if isinstance(detail, str) else ""
if not value or value in seen:
continue
seen.add(value)
merged.append(value)
if not merged:
return ""
placeholder = "Check stderr output for details"
non_placeholder = [detail for detail in merged if placeholder not in detail]
preferred = non_placeholder if non_placeholder else merged
return "\n\n".join(preferred)
async def _should_retry_rate_limit_error(self, error_message: str, attempt: int) -> bool:
if attempt >= len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS):
return False
text = error_message.lower()
retryable_tokens = (
"rate_limit_event",
"rate limit",
"rate-limited",
"too many requests",
"status code: 429",
"status code 429",
)
if not any(token in text for token in retryable_tokens):
return False
time_marks = self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS
target_mark = time_marks[attempt]
previous_mark = time_marks[attempt - 1] if attempt > 0 else 0.0
delay = max(target_mark - previous_mark, 0.0)
logger.warning(
"Claude SDK rate limit/transient event detected (attempt %d/%d). "
"Retrying in %.1fs (target %.1fs from first failure).",
attempt + 1,
len(self._RATE_LIMIT_RETRY_TIME_MARKS_SECONDS) + 1,
delay,
target_mark,
)
await asyncio.sleep(delay)
return True
@staticmethod
def _format_error(message: str, stderr_detail: str, add_hint: bool = False) -> str:
hint = ""
if add_hint:
hint = (
"\nHint: verify Claude auth is available (ANTHROPIC_API_KEY or "
"ANTHROPIC_AUTH_TOKEN, or a valid Claude Code OAuth session) and that the "
"process can write ~/.claude and ~/.claude.json."
)
if stderr_detail:
return f"{message}\nSDK stderr:\n{stderr_detail}{hint}"
return f"{message}{hint}"
def _record_stream_message(
self,
*,
msg: Any,
observability: Any | None,
agent_name: str,
task_id: str,
current_session_id: str | None,
) -> str | None:
session_id = getattr(msg, "session_id", None) or current_session_id
parent_tool_use_id = getattr(msg, "parent_tool_use_id", None)
stream_event = getattr(msg, "event", None)
if stream_event is not None and hasattr(msg, "uuid"):
stream_event_type = None
if isinstance(stream_event, dict):
stream_event_type = stream_event.get("type") or stream_event.get("event")
self._emit_observability_event(
observability,
agent_name,
task_id,
"stream_event",
{
"session_id": session_id,
"stream_event_type": stream_event_type,
"parent_tool_use_id": parent_tool_use_id,
},
)
return session_id
content = getattr(msg, "content", None)
if content:
is_assistant_message = hasattr(msg, "model")
self._emit_observability_event(
observability,
agent_name,
task_id,
"assistant_message" if is_assistant_message else "user_message",
{
"session_id": session_id,
"parent_tool_use_id": parent_tool_use_id,
"model": getattr(msg, "model", None),
"content_block_count": len(content) if isinstance(content, list) else 1,
},
)
if isinstance(content, list):
for block in content:
self._record_content_block(
block=block,
observability=observability,
agent_name=agent_name,
task_id=task_id,
session_id=session_id,
parent_tool_use_id=parent_tool_use_id,
)
subtype = getattr(msg, "subtype", None)
if isinstance(subtype, str):
if hasattr(msg, "duration_ms"):
self._emit_observability_event(
observability,
agent_name,
task_id,
"result_message",
{
"session_id": session_id,
"subtype": subtype,
"is_error": bool(getattr(msg, "is_error", False)),
"num_turns": getattr(msg, "num_turns", None),
"duration_ms": getattr(msg, "duration_ms", None),
"duration_api_ms": getattr(msg, "duration_api_ms", None),
"total_cost_usd": getattr(msg, "total_cost_usd", None),
"usage": self._sanitize_payload(getattr(msg, "usage", None)),
},
)
elif hasattr(msg, "data"):
self._emit_observability_event(
observability,
agent_name,
task_id,
"system_message",
{
"session_id": session_id,
"subtype": subtype,
"data": self._sanitize_payload(getattr(msg, "data", None)),
},
)
return session_id
def _record_content_block(
self,
*,
block: Any,
observability: Any | None,
agent_name: str,
task_id: str,
session_id: str | None,
parent_tool_use_id: str | None,
) -> None:
block_name = getattr(block, "name", None)
block_input = getattr(block, "input", None)
block_id = getattr(block, "id", None)
if block_name is not None and block_input is not None and block_id is not None:
self._emit_observability_event(
observability,
agent_name,
task_id,
"tool_use",
{
"session_id": session_id,
"tool_use_id": block_id,
"parent_tool_use_id": parent_tool_use_id,
"tool_name": str(block_name),
"tool_input": self._sanitize_payload(block_input),
},
)
return
tool_use_id = getattr(block, "tool_use_id", None)
if tool_use_id is not None:
content = getattr(block, "content", None)
self._emit_observability_event(
observability,
agent_name,
task_id,
"tool_result",
{
"session_id": session_id,
"tool_use_id": tool_use_id,
"parent_tool_use_id": parent_tool_use_id,
"is_error": bool(getattr(block, "is_error", False)),
"content": self._sanitize_payload(content),
},
)
return
text = getattr(block, "text", None)
if isinstance(text, str) and text:
self._emit_observability_event(
observability,
agent_name,
task_id,
"text_block",
{
"session_id": session_id,
"chars": len(text),
"preview": self._truncate_text(text),
},
)
return
thinking = getattr(block, "thinking", None)
if isinstance(thinking, str) and thinking:
self._emit_observability_event(
observability,
agent_name,
task_id,
"thinking_block",
{
"session_id": session_id,
"chars": len(thinking),
},
)
def _emit_observability_event(
self,
observability: Any | None,
agent_name: str,
task_id: str,
event_type: str,
payload: dict[str, Any] | None = None,
) -> None:
if observability is None:
return
log_method = getattr(observability, "log_claude_event", None)
if not callable(log_method):
return
try:
log_method(
agent_name=agent_name,
task_id=task_id,
event_type=event_type,
payload=self._sanitize_payload(payload),
)
except Exception:
# Observability should never break execution.
logger.debug("Failed to emit observability event", exc_info=True)
@classmethod
def _is_sensitive_key(cls, key: Any) -> bool:
if not isinstance(key, str):
return False
lowered = key.lower()
return any(token in lowered for token in cls._SENSITIVE_KEY_TOKENS)
@classmethod
def _sanitize_payload(cls, value: Any, *, _depth: int = 0) -> Any:
if _depth >= 4:
return "[truncated]"
if isinstance(value, dict):
sanitized: dict[str, Any] = {}
for idx, (k, v) in enumerate(value.items()):
if idx >= 40:
sanitized["__truncated_items__"] = len(value) - 40
break
key = str(k)
if cls._is_sensitive_key(key):
sanitized[key] = "[REDACTED]"
else:
sanitized[key] = cls._sanitize_payload(v, _depth=_depth + 1)
return sanitized
if isinstance(value, (list, tuple)):
items = [cls._sanitize_payload(v, _depth=_depth + 1) for v in value[:40]]
if len(value) > 40:
items.append(f"...({len(value) - 40} more)")
return items
if isinstance(value, str):
return cls._truncate_text(value)
if isinstance(value, (int, float, bool)) or value is None:
return value
return cls._truncate_text(str(value))
@staticmethod
def _truncate_text(value: str, max_chars: int = 400) -> str:
if not isinstance(value, str):
return ""
trimmed = value.strip()
if len(trimmed) <= max_chars:
return trimmed
return f"{trimmed[:max_chars]}...({len(trimmed) - max_chars} more chars)"

444
app_factory/core/graph.py Normal file
View File

@@ -0,0 +1,444 @@
"""Graph Orchestrator - LangGraph-based multi-agent workflow orchestration."""
import asyncio
import json
import logging
import os
from typing import TypedDict
from langgraph.graph import END, START, StateGraph
logger = logging.getLogger(__name__)
class AppFactoryState(TypedDict):
"""Global state passed through the orchestration graph."""
user_input: str
prd: str
tasks: list # All tasks from task-master
active_tasks: dict # task_id -> {status, container_id, worktree_path}
completed_tasks: list # List of completed task_ids
blocked_tasks: dict # task_id -> reason
clarification_requests: list # Pending clarification dicts
global_architecture: str # Architecture summary for dev agents
iteration_count: int # Safety counter to prevent infinite loops
max_iterations: int # Max loop iterations (default 50)
errors: list # Error log
class AppFactoryOrchestrator:
"""Main LangGraph state machine for the App Factory."""
def __init__(
self,
pm_agent=None,
task_agent=None,
dev_manager=None,
qa_agent=None,
workspace_manager=None,
observability=None,
):
self.pm_agent = pm_agent
self.task_agent = task_agent
self.dev_manager = dev_manager
self.qa_agent = qa_agent
self.workspace_manager = workspace_manager
self.observability = observability
def build_graph(self) -> StateGraph:
"""Build and compile the LangGraph StateGraph with nodes and edges."""
graph = StateGraph(AppFactoryState)
graph.add_node("pm_node", self._pm_node)
graph.add_node("task_node", self._task_node)
graph.add_node("dev_dispatch_node", self._dev_dispatch_node)
graph.add_node("qa_node", self._qa_node)
graph.add_node("clarification_node", self._clarification_node)
graph.add_edge(START, "pm_node")
graph.add_conditional_edges(
"pm_node",
self._should_continue_after_pm,
{
"task_node": "task_node",
"end": END,
},
)
graph.add_conditional_edges(
"task_node",
self._should_continue_after_tasks,
{
"dev_dispatch": "dev_dispatch_node",
"end": END,
"clarification": "clarification_node",
},
)
graph.add_edge("dev_dispatch_node", "qa_node")
graph.add_conditional_edges(
"qa_node",
self._should_continue_after_qa,
{
"task_node": "task_node",
"clarification": "clarification_node",
"end": END,
},
)
graph.add_edge("clarification_node", "task_node")
return graph.compile()
def _should_continue_after_pm(self, state: dict) -> str:
"""Routing function after pm_node: 'task_node' | 'end'."""
prd = state.get("prd", "")
if prd and prd.strip():
return "task_node"
# PM failure (or empty prompt) yields no PRD and should terminate cleanly.
return "end"
def _should_continue_after_tasks(self, state: dict) -> str:
"""Routing function after task_node: 'dev_dispatch' | 'end' | 'clarification'."""
if state.get("iteration_count", 0) >= state.get("max_iterations", 50):
return "end"
tasks = state.get("tasks", [])
completed = set(state.get("completed_tasks", []))
all_task_ids = {str(t.get("id", "")) for t in tasks}
# Check if all tasks are done
if all_task_ids and all_task_ids <= completed:
return "end"
# Check for unblocked tasks (pending tasks with all deps done)
unblocked = []
for t in tasks:
if str(t.get("id", "")) in completed:
continue
if t.get("status") == "done":
continue
deps = [str(d) for d in t.get("dependencies", [])]
if all(d in completed for d in deps):
unblocked.append(t)
if unblocked:
return "dev_dispatch"
# No unblocked tasks - if there are blocked ones, try clarification
if state.get("blocked_tasks") or state.get("clarification_requests"):
return "clarification"
# No tasks at all or nothing left to do
return "end"
def _should_continue_after_qa(self, state: dict) -> str:
"""Routing function after qa_node: 'task_node' | 'clarification' | 'end'."""
if state.get("iteration_count", 0) >= state.get("max_iterations", 50):
return "end"
if state.get("clarification_requests"):
return "clarification"
# Loop back to check for newly unblocked tasks
return "task_node"
async def _pm_node(self, state: dict) -> dict:
"""Call PM agent to expand user input into a PRD."""
if self.observability:
self.observability.log_state_transition("start", "pm_node")
user_input = state.get("user_input", "")
if not user_input:
return {"prd": "", "errors": state.get("errors", []) + ["No user input provided"]}
if self.pm_agent is None:
return {"prd": f"Mock PRD for: {user_input}"}
try:
prd = await self.pm_agent.expand_prompt_to_prd(user_input)
return {"prd": prd}
except Exception as e:
logger.error("PM agent failed: %s", e)
return {"prd": "", "errors": state.get("errors", []) + [f"PM agent error: {e}"]}
async def _task_node(self, state: dict) -> dict:
"""Parse PRD into tasks or get unblocked tasks. Increments iteration_count."""
if self.observability:
self.observability.log_state_transition("pm_node/qa_node/clarification_node", "task_node")
iteration_count = state.get("iteration_count", 0) + 1
updates = {"iteration_count": iteration_count}
if iteration_count >= state.get("max_iterations", 50):
updates["errors"] = state.get("errors", []) + ["Max iterations reached"]
return updates
if self.task_agent is None:
return updates
try:
existing_tasks = state.get("tasks", [])
if not existing_tasks:
# First pass - parse the PRD
prd = state.get("prd", "")
if prd:
await self.task_agent.parse_prd(prd)
unblocked = await self.task_agent.get_unblocked_tasks()
updates["tasks"] = unblocked
else:
# Subsequent passes - refresh unblocked tasks
unblocked = await self.task_agent.get_unblocked_tasks()
updates["tasks"] = unblocked
except Exception as e:
logger.error("Task agent failed: %s", e)
updates["errors"] = state.get("errors", []) + [f"Task agent error: {e}"]
return updates
async def _dev_dispatch_node(self, state: dict) -> dict:
"""Dispatch dev agents concurrently for unblocked tasks."""
if self.observability:
self.observability.log_state_transition("task_node", "dev_dispatch_node")
tasks = state.get("tasks", [])
completed = set(state.get("completed_tasks", []))
active_tasks = dict(state.get("active_tasks", {}))
errors = list(state.get("errors", []))
clarification_requests = list(state.get("clarification_requests", []))
global_arch = state.get("global_architecture", "")
# Filter to unblocked, not-yet-completed tasks
to_execute = []
for t in tasks:
tid = str(t.get("id", ""))
if tid in completed or tid in active_tasks:
continue
deps = [str(d) for d in t.get("dependencies", [])]
if all(d in completed for d in deps):
to_execute.append(t)
if not to_execute:
return {}
if self.dev_manager is None or self.workspace_manager is None:
# Mock execution for testing
new_completed = list(completed)
for t in to_execute:
tid = str(t.get("id", ""))
active_tasks[tid] = {"status": "success", "container_id": "mock", "worktree_path": "/mock"}
new_completed.append(tid)
return {"active_tasks": active_tasks, "completed_tasks": new_completed}
async def _execute_single(task):
tid = str(task.get("id", ""))
worktree_path = None
container = None
try:
worktree_path = await self.workspace_manager.create_worktree(tid)
container = await self.workspace_manager.spin_up_clean_room(worktree_path, tid)
container_id = container.id
if self.task_agent:
await self.task_agent.update_task_status(tid, "in-progress")
result = await self.dev_manager.execute_with_retry(
task, container_id, worktree_path, global_arch
)
return tid, result, worktree_path
except Exception as e:
logger.error("Dev dispatch failed for task %s: %s", tid, e)
return tid, {"status": "failed", "output": str(e), "files_changed": [], "exit_code": -1}, worktree_path
# Execute concurrently
results = await asyncio.gather(*[_execute_single(t) for t in to_execute], return_exceptions=True)
new_completed = list(completed)
for item in results:
if isinstance(item, Exception):
errors.append(f"Dev dispatch exception: {item}")
continue
tid, result, worktree_path = item
status = result.get("status", "failed")
active_tasks[tid] = {
"status": status,
"container_id": result.get("container_id", ""),
"worktree_path": worktree_path or "",
}
if status == "success":
new_completed.append(tid)
elif status == "needs_clarification":
clarification_requests.append({
"requesting_agent": "dev_agent",
"task_id": tid,
"question": f"Task {tid} failed after retries. Output: {result.get('output', '')[:500]}",
"context": result.get("output", "")[:1000],
})
return {
"active_tasks": active_tasks,
"completed_tasks": new_completed,
"errors": errors,
"clarification_requests": clarification_requests,
}
async def _qa_node(self, state: dict) -> dict:
"""Run QA on completed dev tasks."""
if self.observability:
self.observability.log_state_transition("dev_dispatch_node", "qa_node")
active_tasks = dict(state.get("active_tasks", {}))
completed = list(state.get("completed_tasks", []))
errors = list(state.get("errors", []))
clarification_requests = list(state.get("clarification_requests", []))
blocked_tasks = dict(state.get("blocked_tasks", {}))
# Find tasks that were successfully completed by dev and need QA
tasks_for_qa = []
for tid, info in active_tasks.items():
if info.get("status") == "success" and tid in completed:
tasks_for_qa.append((tid, info))
if not tasks_for_qa or self.qa_agent is None:
return {}
for tid, info in tasks_for_qa:
worktree_path = info.get("worktree_path", "")
if not worktree_path:
continue
try:
# Find the task dict for context
task_dict = None
for t in state.get("tasks", []):
if str(t.get("id", "")) == tid:
task_dict = t
break
qa_result = await self.qa_agent.review_and_merge(tid, worktree_path, task=task_dict)
qa_status = qa_result.get("status", "")
if qa_status == "merged":
# Successfully merged - update task status
if self.task_agent:
await self.task_agent.update_task_status(tid, "done")
active_tasks[tid]["status"] = "merged"
else:
# QA failed - may need clarification or retry
retry_count = qa_result.get("retry_count", 0)
if retry_count >= (self.qa_agent.max_retries if self.qa_agent else 3):
clarification_requests.append({
"requesting_agent": "qa_agent",
"task_id": tid,
"question": f"QA failed for task {tid} with status '{qa_status}'",
"context": str(qa_result),
})
else:
blocked_tasks[tid] = f"QA {qa_status}: {qa_result}"
# Remove from completed so it can be retried
if tid in completed:
completed.remove(tid)
active_tasks[tid]["status"] = qa_status
# Cleanup workspace after QA
if self.workspace_manager:
try:
await self.workspace_manager.cleanup_workspace(tid)
except Exception as e:
logger.warning("Workspace cleanup failed for task %s: %s", tid, e)
except Exception as e:
logger.error("QA failed for task %s: %s", tid, e)
errors.append(f"QA error for task {tid}: {e}")
return {
"active_tasks": active_tasks,
"completed_tasks": completed,
"errors": errors,
"clarification_requests": clarification_requests,
"blocked_tasks": blocked_tasks,
}
async def _clarification_node(self, state: dict) -> dict:
"""Handle clarification requests via PM agent."""
if self.observability:
self.observability.log_state_transition("task_node/qa_node", "clarification_node")
requests = list(state.get("clarification_requests", []))
blocked_tasks = dict(state.get("blocked_tasks", {}))
errors = list(state.get("errors", []))
if not requests:
return {"clarification_requests": []}
if self.pm_agent is None:
# Clear requests without processing for testing
return {"clarification_requests": [], "blocked_tasks": {}}
resolved = []
remaining = []
for req in requests:
try:
answer = await self.pm_agent.handle_clarification_request(req)
tid = req.get("task_id", "")
if tid and tid in blocked_tasks:
del blocked_tasks[tid]
resolved.append({"request": req, "answer": answer})
except Exception as e:
logger.error("Clarification failed: %s", e)
errors.append(f"Clarification error: {e}")
remaining.append(req)
return {
"clarification_requests": remaining,
"blocked_tasks": blocked_tasks,
"errors": errors,
}
async def run(self, user_input: str) -> dict:
"""Build graph and execute with initial state."""
compiled = self.build_graph()
initial_state = {
"user_input": user_input,
"prd": "",
"tasks": [],
"active_tasks": {},
"completed_tasks": [],
"blocked_tasks": {},
"clarification_requests": [],
"global_architecture": "",
"iteration_count": 0,
"max_iterations": 50,
"errors": [],
}
if self.observability:
self.observability.log_state_transition("init", "run")
result = await compiled.ainvoke(initial_state)
self.save_state(result)
return result
def save_state(self, state: dict, path: str = "app_factory/data/state.json"):
"""Persist state to disk."""
os.makedirs(os.path.dirname(path), exist_ok=True)
# Convert to JSON-serializable form
serializable = {}
for k, v in state.items():
try:
json.dumps(v)
serializable[k] = v
except (TypeError, ValueError):
serializable[k] = str(v)
with open(path, "w") as f:
json.dump(serializable, f, indent=2)
def load_state(self, path: str = "app_factory/data/state.json") -> dict:
"""Load state from disk."""
with open(path) as f:
return json.load(f)

View File

@@ -0,0 +1,83 @@
"""Logging formatters and helpers for colorized terminal output."""
import logging
import os
import sys
from typing import Optional, TextIO
RESET = "\033[0m"
DIM = "\033[2m"
BOLD = "\033[1m"
FG_BLUE = "\033[34m"
FG_CYAN = "\033[36m"
FG_GREEN = "\033[32m"
FG_MAGENTA = "\033[35m"
FG_YELLOW = "\033[33m"
FG_RED = "\033[31m"
LEVEL_COLORS = {
logging.DEBUG: f"{DIM}{FG_CYAN}",
logging.INFO: FG_GREEN,
logging.WARNING: FG_YELLOW,
logging.ERROR: FG_RED,
logging.CRITICAL: f"{BOLD}{FG_RED}",
}
def should_use_color(stream: Optional[TextIO] = None, use_color: Optional[bool] = None) -> bool:
"""Return whether ANSI colors should be used for the given stream."""
if use_color is not None:
return use_color
if os.getenv("NO_COLOR") is not None:
return False
force_color = os.getenv("FORCE_COLOR", "").strip().lower()
if force_color and force_color not in {"0", "false", "no"}:
return True
if os.getenv("TERM", "").lower() == "dumb":
return False
target_stream = stream or sys.stderr
is_tty = getattr(target_stream, "isatty", None)
return bool(is_tty and is_tty())
def colorize(text: str, style: str, enabled: bool) -> str:
"""Apply ANSI style to text when enabled."""
if not enabled or not style:
return text
return f"{style}{text}{RESET}"
class LevelColorFormatter(logging.Formatter):
"""Formatter that colors only the log level token."""
def __init__(
self,
fmt: Optional[str] = None,
datefmt: Optional[str] = None,
style: str = "%",
*,
stream: Optional[TextIO] = None,
use_color: Optional[bool] = None,
):
super().__init__(fmt=fmt, datefmt=datefmt, style=style)
self._use_color = should_use_color(stream=stream, use_color=use_color)
def format(self, record: logging.LogRecord) -> str:
if not self._use_color:
return super().format(record)
original_levelname = record.levelname
record.levelname = colorize(
original_levelname,
LEVEL_COLORS.get(record.levelno, ""),
enabled=True,
)
try:
return super().format(record)
finally:
record.levelname = original_levelname

View File

@@ -0,0 +1,572 @@
"""Observability Manager - LangSmith tracing, logging, and monitoring."""
import contextlib
import functools
import inspect
import json
import logging
import os
import time
import traceback
import uuid
from collections import defaultdict
from datetime import datetime, timezone
from typing import Any, Callable, Optional
from app_factory.core.logging_utils import (
FG_BLUE,
FG_CYAN,
FG_MAGENTA,
LEVEL_COLORS,
colorize,
should_use_color,
)
class _StructuredFormatter(logging.Formatter):
"""Custom formatter: [ISO_TIMESTAMP] [AGENT] [TASK] [LEVEL] message"""
_EVENT_COLORS = {
"State transition": FG_MAGENTA,
"Token usage": FG_BLUE,
"Claude event": FG_BLUE,
"Trace started": FG_CYAN,
"Trace ended": FG_CYAN,
}
def __init__(self, use_color: Optional[bool] = None):
super().__init__()
self._use_color = should_use_color(use_color=use_color)
def _colorize_message(self, message: str) -> str:
for prefix, style in self._EVENT_COLORS.items():
if message.startswith(prefix):
return colorize(message, style, self._use_color)
return message
def format(self, record: logging.LogRecord) -> str:
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S")
agent = getattr(record, "agent_name", "SYSTEM")
task = getattr(record, "task_id", "-")
level = record.levelname
message = record.getMessage()
if self._use_color:
ts = colorize(ts, FG_BLUE, enabled=True)
agent = colorize(agent, FG_CYAN, enabled=True)
task = colorize(task, FG_MAGENTA, enabled=True)
level = colorize(level, LEVEL_COLORS.get(record.levelno, ""), enabled=True)
message = self._colorize_message(message)
return f"[{ts}] [{agent}] [{task}] [{level}] {message}"
class _TraceContext:
"""Async context manager for trace_context()."""
def __init__(self, manager: "ObservabilityManager", agent_name: str, task_id: str):
self._manager = manager
self._agent_name = agent_name
self._task_id = task_id
self._run_id: Optional[str] = None
async def __aenter__(self) -> str:
self._run_id = self._manager.start_trace(self._agent_name, self._task_id)
return self._run_id
async def __aexit__(self, exc_type, exc_val, exc_tb) -> bool:
if exc_val is not None:
self._manager.end_trace(
self._run_id, error=f"{exc_type.__name__}: {exc_val}"
)
else:
self._manager.end_trace(self._run_id)
return False # do not suppress exceptions
class ObservabilityManager:
"""Wraps LangSmith client for tracing and structured logging."""
_CLAUDE_EVENT_FILTERS = {
"quiet": {
"request_start",
"request_error",
"request_complete",
"tool_use",
"tool_result",
},
"focused": {
"request_start",
"request_error",
"request_complete",
"tool_use",
"tool_result",
"thinking_block",
"result_message",
},
"verbose": None, # no filtering
"off": set(),
}
def __init__(self, project_name: str = None, claude_event_mode: str | None = None):
self.project_name = project_name or os.getenv("LANGSMITH_PROJECT", "app-factory")
requested_mode = (
claude_event_mode
or os.getenv("APP_FACTORY_CLAUDE_EVENT_MODE", "quiet")
)
normalized_mode = requested_mode.strip().lower() if isinstance(requested_mode, str) else "focused"
self._claude_event_mode = (
normalized_mode if normalized_mode in self._CLAUDE_EVENT_FILTERS else "focused"
)
# --- LangSmith client (optional) ---
self._client = None
try:
from langsmith import Client # noqa: F811
self._client = Client()
except Exception as exc:
# LangSmith not configured or unreachable -- degrade gracefully
logging.getLogger(__name__).warning(
"LangSmith unavailable (%s). Tracing disabled.", exc
)
# --- Structured logger ---
self.logger = logging.getLogger(f"app_factory.{self.project_name}")
if not self.logger.handlers:
handler = logging.StreamHandler()
handler.setFormatter(_StructuredFormatter())
self.logger.addHandler(handler)
self.logger.setLevel(logging.DEBUG)
self.logger.propagate = False
self._tool_name_by_use_id: dict[str, str] = {}
self._tool_summary_by_use_id: dict[str, str] = {}
# --- Internal metrics ---
self._active_runs: dict[str, dict] = {}
self._metrics = {
"total_tokens": 0,
"total_traces": 0,
"total_errors": 0,
"total_claude_events": 0,
"total_tool_calls": 0,
"per_agent": defaultdict(lambda: {
"tokens": 0,
"traces": 0,
"errors": 0,
"claude_events": 0,
"tool_calls": 0,
}),
}
# ------------------------------------------------------------------
# Tracing
# ------------------------------------------------------------------
def start_trace(self, agent_name: str, task_id: str, inputs: dict = None) -> str:
"""Start a new trace run, return run_id."""
run_id = uuid.uuid4().hex
self._metrics["total_traces"] += 1
self._metrics["per_agent"][agent_name]["traces"] += 1
self._active_runs[run_id] = {
"agent_name": agent_name,
"task_id": task_id,
"start_time": time.time(),
}
self.logger.info(
"Trace started: run_id=%s",
run_id,
extra={"agent_name": agent_name, "task_id": task_id},
)
try:
if self._client is not None:
self._client.create_run(
name=f"{agent_name}:{task_id}",
run_type="chain",
inputs=inputs or {},
id=run_id,
project_name=self.project_name,
)
except Exception as exc:
self.logger.warning(
"LangSmith create_run failed: %s",
exc,
extra={"agent_name": agent_name, "task_id": task_id},
)
return run_id
def end_trace(self, run_id: str, outputs: dict = None, error: str = None):
"""End a trace run with outputs or error."""
run_info = self._active_runs.pop(run_id, {})
agent_name = run_info.get("agent_name", "unknown")
task_id = run_info.get("task_id", "-")
if error:
self._metrics["total_errors"] += 1
self._metrics["per_agent"][agent_name]["errors"] += 1
self.logger.error(
"Trace error: run_id=%s error=%s",
run_id,
error,
extra={"agent_name": agent_name, "task_id": task_id},
)
else:
self.logger.info(
"Trace ended: run_id=%s",
run_id,
extra={"agent_name": agent_name, "task_id": task_id},
)
try:
if self._client is not None:
update_kwargs: dict[str, Any] = {"end_time": datetime.now(timezone.utc)}
if outputs:
update_kwargs["outputs"] = outputs
if error:
update_kwargs["error"] = error
self._client.update_run(run_id, **update_kwargs)
except Exception as exc:
self.logger.warning(
"LangSmith update_run failed: %s",
exc,
extra={"agent_name": agent_name, "task_id": task_id},
)
# ------------------------------------------------------------------
# Decorator
# ------------------------------------------------------------------
def trace_agent_execution(self, agent_name: str, task_id: str):
"""Decorator for tracking agent calls with context."""
def decorator(func: Callable):
@functools.wraps(func)
async def async_wrapper(*args, **kwargs):
run_id = self.start_trace(agent_name, task_id, inputs={"args": str(args), "kwargs": str(kwargs)})
try:
result = await func(*args, **kwargs)
self.end_trace(run_id, outputs={"result": str(result)})
return result
except Exception as exc:
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
raise
@functools.wraps(func)
def sync_wrapper(*args, **kwargs):
run_id = self.start_trace(agent_name, task_id, inputs={"args": str(args), "kwargs": str(kwargs)})
try:
result = func(*args, **kwargs)
self.end_trace(run_id, outputs={"result": str(result)})
return result
except Exception as exc:
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
raise
if inspect.iscoroutinefunction(func):
return async_wrapper
return sync_wrapper
return decorator
# ------------------------------------------------------------------
# Async helpers
# ------------------------------------------------------------------
async def trace_agent(self, agent_name: str, task_id: str, func: Callable):
"""Async helper to run a function within a trace context."""
run_id = self.start_trace(agent_name, task_id)
try:
result = await func()
self.end_trace(run_id, outputs={"result": str(result)})
return result
except Exception as exc:
self.end_trace(run_id, error=f"{type(exc).__name__}: {exc}")
raise
def trace_context(self, agent_name: str, task_id: str) -> _TraceContext:
"""Return an async context manager for tracing.
Usage::
async with obs.trace_context("agent", "task_id") as run_id:
...
"""
return _TraceContext(self, agent_name, task_id)
# ------------------------------------------------------------------
# Logging helpers
# ------------------------------------------------------------------
def log_state_transition(self, from_state: str, to_state: str, metadata: dict = None):
"""Log a state machine transition."""
msg = f"State transition: {from_state} -> {to_state}"
if metadata:
msg += f" metadata={metadata}"
self.logger.info(msg, extra={"agent_name": "STATE_MACHINE", "task_id": "-"})
def log_token_usage(
self,
agent_name: str,
task_id: str,
input_tokens: int,
output_tokens: int,
model: str = None,
):
"""Log token usage for cost monitoring."""
total = input_tokens + output_tokens
self._metrics["total_tokens"] += total
self._metrics["per_agent"][agent_name]["tokens"] += total
msg = f"Token usage: input={input_tokens} output={output_tokens} total={total}"
if model:
msg += f" model={model}"
self.logger.info(msg, extra={"agent_name": agent_name, "task_id": task_id})
def log_error(self, agent_name: str, task_id: str, error: Exception, context: dict = None):
"""Log an error with full stack trace."""
self._metrics["total_errors"] += 1
self._metrics["per_agent"][agent_name]["errors"] += 1
tb = traceback.format_exception(type(error), error, error.__traceback__)
msg = f"Error: {error}\n{''.join(tb)}"
if context:
msg += f" context={context}"
self.logger.error(msg, extra={"agent_name": agent_name, "task_id": task_id})
def log_claude_event(
self,
agent_name: str,
task_id: str,
event_type: str,
payload: dict | None = None,
):
"""Log a Claude SDK/CLI event in structured form."""
self._metrics["total_claude_events"] += 1
self._metrics["per_agent"][agent_name]["claude_events"] += 1
normalized_event = (event_type or "unknown").strip().lower()
normalized_payload = dict(payload or {})
if normalized_event == "tool_use":
self._metrics["total_tool_calls"] += 1
self._metrics["per_agent"][agent_name]["tool_calls"] += 1
tool_use_id = normalized_payload.get("tool_use_id")
tool_name = normalized_payload.get("tool_name")
tool_input = normalized_payload.get("tool_input")
if isinstance(tool_use_id, str) and isinstance(tool_name, str):
self._tool_name_by_use_id[tool_use_id] = tool_name
self._tool_summary_by_use_id[tool_use_id] = self._summarize_tool_input(
str(tool_name),
tool_input,
)
if normalized_event == "tool_result":
tool_use_id = normalized_payload.get("tool_use_id")
if isinstance(tool_use_id, str):
tool_name = self._tool_name_by_use_id.pop(tool_use_id, None)
tool_summary = self._tool_summary_by_use_id.pop(tool_use_id, None)
if "tool_name" not in normalized_payload and tool_name:
normalized_payload["tool_name"] = tool_name
if "tool_input_summary" not in normalized_payload and tool_summary:
normalized_payload["tool_input_summary"] = tool_summary
if not self._should_log_claude_event(normalized_event):
return
msg = self._format_claude_event_message(normalized_event, normalized_payload)
if not msg:
return
self.logger.debug(msg, extra={"agent_name": agent_name, "task_id": task_id})
def _should_log_claude_event(self, event_type: str) -> bool:
allowed = self._CLAUDE_EVENT_FILTERS.get(self._claude_event_mode)
if allowed is None:
return True
return event_type in allowed
def _format_claude_event_message(self, event_type: str, payload: dict[str, Any]) -> str:
session_id = payload.get("session_id")
session_suffix = f" session={session_id}" if session_id else ""
if event_type == "request_start":
model = payload.get("model") or "default"
prompt_chars = payload.get("prompt_chars", 0)
return f"Claude request started: model={model} prompt_chars={prompt_chars}{session_suffix}"
if event_type == "request_complete":
inp = payload.get("input_tokens", 0)
out = payload.get("output_tokens", 0)
subtype = payload.get("result_subtype") or "unknown"
preview = self._shorten_text(payload.get("result_preview"), max_chars=140)
preview_fragment = f' result="{preview}"' if preview else ""
return (
f"Claude request completed: subtype={subtype} "
f"tokens={inp}->{out}{preview_fragment}{session_suffix}"
)
if event_type == "request_error":
err = self._shorten_text(payload.get("error"))
retrying = payload.get("retrying")
retry_fragment = " retrying=true" if retrying else ""
return f"Claude request error: {err}{retry_fragment}{session_suffix}"
if event_type == "tool_use":
tool_name = payload.get("tool_name", "unknown_tool")
tool_input = payload.get("tool_input")
input_summary = self._summarize_tool_input(str(tool_name), tool_input)
return f"Claude tool call: {tool_name} {input_summary}{session_suffix}"
if event_type == "tool_result":
tool_name = payload.get("tool_name", "tool")
is_error = bool(payload.get("is_error", False))
content = payload.get("content")
input_summary = payload.get("tool_input_summary")
input_fragment = f" {input_summary}" if input_summary else ""
status = "error" if is_error else "ok"
if self._is_noisy_tool_name(str(tool_name)) and not is_error:
return ""
if self._is_noisy_tool_name(str(tool_name)) and is_error:
error_preview = self._shorten_text(content, max_chars=420)
error_fragment = f" error={error_preview}" if error_preview else ""
return (
f"Claude tool result: {tool_name} status={status}"
f"{input_fragment}{error_fragment}{session_suffix}"
)
content_preview = self._compact_json(content, max_chars=420)
return (
f"Claude tool result: {tool_name} status={status}"
f"{input_fragment} content={content_preview}{session_suffix}"
)
if event_type == "text_block":
preview = self._shorten_text(payload.get("preview"))
return f"Claude says: {preview}{session_suffix}"
if event_type == "thinking_block":
chars = payload.get("chars", 0)
return f"Claude thinking block: chars={chars}{session_suffix}"
if event_type == "result_message":
subtype = payload.get("subtype", "unknown")
turns = payload.get("num_turns", 0)
duration_ms = payload.get("duration_ms")
duration_fragment = f" duration_ms={duration_ms}" if duration_ms is not None else ""
return f"Claude result message: subtype={subtype} turns={turns}{duration_fragment}{session_suffix}"
payload_json = self._compact_json(payload)
return f"Claude event: type={event_type} payload={payload_json}{session_suffix}"
@staticmethod
def _shorten_text(value: Any, max_chars: int = 220) -> str:
text = str(value) if value is not None else ""
text = text.strip().replace("\n", " ")
if len(text) <= max_chars:
return text
return f"{text[:max_chars]}..."
@staticmethod
def _compact_json(value: Any, max_chars: int = 300) -> str:
with contextlib.suppress(TypeError, ValueError):
rendered = json.dumps(value, sort_keys=True, default=str)
if len(rendered) <= max_chars:
return rendered
return f"{rendered[:max_chars]}..."
return ObservabilityManager._shorten_text(value, max_chars=max_chars)
@staticmethod
def _is_noisy_tool_name(tool_name: str) -> bool:
return tool_name.lower() in {"read", "bash", "grep", "glob", "find", "ls"}
@classmethod
def _summarize_tool_input(cls, tool_name: str, tool_input: Any) -> str:
if not isinstance(tool_input, dict):
return f"input={cls._compact_json(tool_input, max_chars=140)}"
normalized_name = tool_name.lower()
if normalized_name == "read":
path = tool_input.get("file_path") or tool_input.get("path")
return f"path={cls._shorten_path(path, max_chars=120)}"
if normalized_name == "bash":
cmd = tool_input.get("command")
compact_cmd = cls._abbreviate_workspace_paths(cmd)
return f"command={cls._shorten_text(compact_cmd, max_chars=160)}"
description = tool_input.get("description")
if isinstance(description, str) and description.strip():
return f"description={cls._shorten_text(description, max_chars=140)}"
summary_keys = ("file_path", "path", "pattern", "query", "command", "name")
summary: dict[str, Any] = {}
for key in summary_keys:
if key in tool_input:
value = tool_input[key]
if key in {"file_path", "path"}:
value = cls._shorten_path(value, max_chars=120)
summary[key] = value
if summary:
return f"input={cls._compact_json(summary, max_chars=160)}"
return f"input={cls._compact_json(tool_input, max_chars=160)}"
@classmethod
def _shorten_path(cls, value: Any, max_chars: int = 120) -> str:
text = str(value).strip() if value is not None else ""
if not text:
return ""
normalized = text
with contextlib.suppress(Exception):
cwd = os.path.abspath(os.getcwd())
if os.path.isabs(text):
abs_path = os.path.abspath(text)
if abs_path == cwd:
normalized = "."
elif abs_path.startswith(f"{cwd}{os.sep}"):
normalized = os.path.relpath(abs_path, cwd)
else:
normalized = text.replace(f"{cwd}{os.sep}", "")
return cls._shorten_text(normalized, max_chars=max_chars)
@staticmethod
def _abbreviate_workspace_paths(value: Any) -> str:
text = str(value).strip() if value is not None else ""
if not text:
return ""
compact = text
with contextlib.suppress(Exception):
cwd = os.path.abspath(os.getcwd())
compact = compact.replace(f"{cwd}{os.sep}", "")
compact = compact.replace(cwd, ".")
return compact
@classmethod
def _estimate_chars(cls, value: Any) -> int:
if value is None:
return 0
if isinstance(value, str):
return len(value)
with contextlib.suppress(TypeError, ValueError):
return len(json.dumps(value, default=str))
return len(str(value))
# ------------------------------------------------------------------
# Metrics
# ------------------------------------------------------------------
def get_metrics(self) -> dict:
"""Return accumulated metrics (total tokens, traces, errors)."""
return {
"total_tokens": self._metrics["total_tokens"],
"total_traces": self._metrics["total_traces"],
"total_errors": self._metrics["total_errors"],
"total_claude_events": self._metrics["total_claude_events"],
"total_tool_calls": self._metrics["total_tool_calls"],
"per_agent": dict(self._metrics["per_agent"]),
}

View File

@@ -0,0 +1,230 @@
"""Workspace Manager - Handles git worktrees and Docker containers for isolated execution."""
import os
import shutil
from pathlib import Path
import docker
import git
class WorkspaceError(Exception):
"""Base exception for workspace operations."""
class GitWorktreeError(WorkspaceError):
"""Exception for git worktree failures."""
class DockerProvisionError(WorkspaceError):
"""Exception for Docker provisioning failures."""
class WorkspaceManager:
"""Manages git worktrees and Docker containers for isolated Dev Agent execution."""
def __init__(self, repo_path: str, docker_image: str = "python:3.11-slim"):
"""Initialize WorkspaceManager.
Args:
repo_path: Path to the git repository.
docker_image: Docker image to use for clean room containers.
"""
try:
self.repo = git.Repo(repo_path)
except git.InvalidGitRepositoryError as e:
raise GitWorktreeError(f"Invalid git repository: {repo_path}") from e
except git.NoSuchPathError as e:
raise GitWorktreeError(f"Repository path not found: {repo_path}") from e
self.repo_path = Path(repo_path).resolve()
self.docker_image = docker_image
self.active_workspaces: dict[str, dict] = {}
try:
self.docker_client = docker.from_env()
except docker.errors.DockerException as e:
raise DockerProvisionError(
"Failed to connect to Docker daemon. Is Docker running?"
) from e
async def create_worktree(self, task_id: str, base_branch: str = "main") -> str:
"""Create a git worktree for a task.
Args:
task_id: Unique identifier for the task.
base_branch: Branch to base the worktree on.
Returns:
Absolute path to the created worktree.
Raises:
GitWorktreeError: If worktree creation fails.
"""
branch_name = f"feature/task-{task_id}"
worktree_path = str(self.repo_path.parent / "worktrees" / task_id)
# Validate base branch exists
try:
self.repo.git.rev_parse("--verify", base_branch)
except git.GitCommandError as e:
raise GitWorktreeError(
f"Base branch '{base_branch}' does not exist"
) from e
# Check if worktree path already exists
if os.path.exists(worktree_path):
raise GitWorktreeError(
f"Worktree path already exists: {worktree_path}"
)
# Check if branch already exists
if branch_name in [ref.name for ref in self.repo.branches]:
raise GitWorktreeError(
f"Branch already exists: {branch_name}"
)
try:
os.makedirs(os.path.dirname(worktree_path), exist_ok=True)
self.repo.git.worktree(
"add", worktree_path, "-b", branch_name, base_branch
)
except git.GitCommandError as e:
raise GitWorktreeError(
f"Failed to create worktree for task {task_id}: {e}"
) from e
return str(Path(worktree_path).resolve())
async def spin_up_clean_room(self, worktree_path: str, task_id: str):
"""Create an isolated Docker container for a task.
Args:
worktree_path: Path to the git worktree to mount.
task_id: Unique identifier for the task.
Returns:
Container object with metadata.
Raises:
DockerProvisionError: If container creation fails.
"""
try:
self.docker_client.images.pull(self.docker_image)
except docker.errors.APIError as e:
raise DockerProvisionError(
f"Failed to pull image '{self.docker_image}': {e}"
) from e
try:
container = self.docker_client.containers.create(
image=self.docker_image,
name=f"appfactory-task-{task_id}",
volumes={
worktree_path: {"bind": "/workspace", "mode": "rw"}
},
working_dir="/workspace",
network_mode="none",
auto_remove=False,
detach=True,
command="sleep infinity",
)
except docker.errors.APIError as e:
raise DockerProvisionError(
f"Failed to create container for task {task_id}: {e}"
) from e
self.active_workspaces[task_id] = {
"task_id": task_id,
"worktree_path": worktree_path,
"container_id": container.id,
"container": container,
}
return container
async def cleanup_workspace(self, task_id: str, container=None):
"""Clean up a workspace by removing its container and worktree.
Args:
task_id: Unique identifier for the task.
container: Optional container object. If None, uses the registered one.
Raises:
WorkspaceError: If cleanup fails completely.
"""
workspace = self.active_workspaces.get(task_id, {})
errors = []
# Resolve container
if container is None:
container = workspace.get("container")
# Stop and remove container
if container is not None:
try:
container.stop(timeout=5)
except Exception:
pass # Container may already be stopped
try:
container.remove(force=True)
except Exception as e:
errors.append(f"Container removal failed: {e}")
# Remove worktree
worktree_path = workspace.get("worktree_path")
if worktree_path is None:
worktree_path = str(self.repo_path.parent / "worktrees" / task_id)
try:
self.repo.git.worktree("remove", worktree_path, "--force")
except git.GitCommandError:
# Worktree may already be removed; try cleaning up the directory
if os.path.exists(worktree_path):
try:
shutil.rmtree(worktree_path)
except OSError as e:
errors.append(f"Worktree directory removal failed: {e}")
# Prune worktree references
try:
self.repo.git.worktree("prune")
except git.GitCommandError:
pass
# Remove from registry
self.active_workspaces.pop(task_id, None)
if errors:
raise WorkspaceError(
f"Cleanup completed with errors for task {task_id}: {'; '.join(errors)}"
)
def get_active_workspaces(self) -> list:
"""Return list of active workspace info dicts.
Returns:
List of dicts with task_id, worktree_path, and container_id.
"""
return [
{
"task_id": info["task_id"],
"worktree_path": info["worktree_path"],
"container_id": info["container_id"],
}
for info in self.active_workspaces.values()
]
async def cleanup_all(self):
"""Cleanup all active workspaces. Used for graceful shutdown."""
task_ids = list(self.active_workspaces.keys())
errors = []
for task_id in task_ids:
try:
await self.cleanup_workspace(task_id)
except WorkspaceError as e:
errors.append(str(e))
if errors:
raise WorkspaceError(
f"Cleanup all completed with errors: {'; '.join(errors)}"
)

View File

@@ -0,0 +1 @@
"""Data models and schemas for App Factory."""

View File

@@ -0,0 +1,15 @@
{
"user_input": "please review the project in the app_factory directory and create an api middleware to sit between the app_factory and the ui. if the core app does not have the required features, they will be added according to the middleware spec. the middleware should support sending + receiving data (start/stop jobs, respond to pm questions, etc), tracking progress, errors, logs, etc. visualizing the graph, tracking multiple projects running at a time",
"prd": "",
"tasks": [],
"active_tasks": {},
"completed_tasks": [],
"blocked_tasks": {},
"clarification_requests": [],
"global_architecture": "",
"iteration_count": 0,
"max_iterations": 50,
"errors": [
"PM agent error: Claude SDK query failed: Command failed with exit code -9 (exit code: -9)\nError output: Check stderr output for details\nHint: verify Claude auth is available (ANTHROPIC_API_KEY or ANTHROPIC_AUTH_TOKEN, or a valid Claude Code OAuth session) and that the process can write ~/.claude and ~/.claude.json."
]
}

View File

@@ -0,0 +1 @@
"""Prompt templates and management for App Factory agents."""

View File

@@ -0,0 +1,22 @@
You are a Dev Agent working on a specific task in an automated software factory.
## YOUR TASK
- Task ID: {task_id}
- Title: {title}
- Description: {description}
## DETAILED REQUIREMENTS
{details}
## TEST STRATEGY
{test_strategy}
## GLOBAL ARCHITECTURE (Read-Only Context)
{global_architecture}
## STRICT INSTRUCTIONS
1. Implement ONLY this task. Do not make changes unrelated to this task.
2. Follow existing code patterns and conventions from the architecture summary.
3. Create or update test files as specified in the test strategy.
4. All tests must pass before you consider the task complete.
5. Do not modify files outside the scope of this task.

View File

@@ -0,0 +1,11 @@
You are a Product Manager resolving a clarification request from a downstream agent.
Agent: {requesting_agent}
Task ID: {task_id}
Question: {question}
Context: {context}
If you can answer this question based on the PRD and general best practices, provide a clear, specific answer.
If the question requires human input (business decision, external dependency, or ambiguous requirement), respond with exactly: ESCALATE_TO_HUMAN
Provide only the answer, no preamble.

View File

@@ -0,0 +1,12 @@
You are an expert Product Manager. Analyze the user's project description and expand it into a comprehensive Product Requirements Document (PRD).
Your PRD must include these sections:
1. **Objective** - Clear project goal and vision
2. **Core Requirements** - Detailed functional requirements (numbered list)
3. **Technical Architecture** - System design, components, data flow
4. **Tech Stack** - Languages, frameworks, databases, infrastructure
5. **Success Criteria** - Measurable outcomes for project completion
6. **Non-Functional Requirements** - Performance, security, scalability constraints
Be specific and actionable. Include edge cases and error handling requirements.
Fill in reasonable technical decisions where the user hasn't specified.

View File

@@ -0,0 +1,20 @@
You are a QA code reviewer in an automated software factory. Review the following code changes for quality and security.
## Task Context
{task_context}
## Code Diff
{diff}
## Review Checklist
1. **Security**: Check for OWASP Top 10 vulnerabilities (SQL injection, XSS, command injection, path traversal)
2. **Code Quality**: Proper error handling, no dead code, clear naming, appropriate abstractions
3. **Task Adherence**: Changes match the task requirements, no scope creep
4. **Testing**: Adequate test coverage for the changes
5. **Potential Bugs**: Race conditions, edge cases, null/None handling
Respond in this format:
APPROVED: true/false
ISSUES:
- [severity: critical/warning/info] description
SUMMARY: One sentence summary of review