first commit

2026-02-25 23:49:54 -05:00
commit 4d097161cb
1775 changed files with 452827 additions and 0 deletions
--- a/app_factory/agents/init.py
+++ b/app_factory/agents/init.py
@@ -0,0 +1,8 @@
+"""Agent modules for the App Factory orchestration framework."""
+
+from app_factory.agents.pm_agent import PMAgent
+from app_factory.agents.task_agent import TaskMasterAgent
+from app_factory.agents.dev_agent import DevAgentManager
+from app_factory.agents.qa_agent import QAAgent
+
+__all__ = ["PMAgent", "TaskMasterAgent", "DevAgentManager", "QAAgent"]
--- a/app_factory/agents/dev_agent.py
+++ b/app_factory/agents/dev_agent.py
@@ -0,0 +1,205 @@
+"""Dev Agent Manager - Spawns Dev Agents in Docker containers via Claude Code."""
+
+import asyncio
+import logging
+import os
+import re
+import tempfile
+from pathlib import Path
+
+import pexpect
+
+logger = logging.getLogger(__name__)
+
+PROMPT_TEMPLATE_PATH = Path(__file__).resolve().parent.parent / "prompts" / "dev_task_execution.txt"
+
+
+class DevAgentManager:
+    """Spawns Dev Agents in Docker containers, interfaces with Claude Code via pexpect."""
+
+    def __init__(self, docker_client=None, max_retries: int = 3, timeout: int = 1800):
+        """Initialize DevAgentManager.
+
+        Args:
+            docker_client: Docker client instance (or None to create from env).
+            max_retries: Maximum Dev-QA bounce retries per task.
+            timeout: Timeout in seconds for Claude Code execution (default 30 min).
+        """
+        if docker_client is not None:
+            self.docker_client = docker_client
+        else:
+            import docker
+            self.docker_client = docker.from_env()
+
+        self.max_retries = max_retries
+        self.timeout = timeout
+        self._retry_counts: dict[str, int] = {}
+
+    def prepare_task_prompt(self, task: dict, global_arch: str = "") -> str:
+        """Build a prompt string for the Dev Agent from the template.
+
+        Args:
+            task: Task dict with keys task_id, title, description, details, testStrategy.
+            global_arch: Optional global architecture summary.
+
+        Returns:
+            Formatted prompt string.
+        """
+        template = PROMPT_TEMPLATE_PATH.read_text()
+        return template.format(
+            task_id=task.get("task_id", task.get("id", "")),
+            title=task.get("title", ""),
+            description=task.get("description", ""),
+            details=task.get("details", ""),
+            test_strategy=task.get("testStrategy", ""),
+            global_architecture=global_arch or "No architecture context provided.",
+        )
+
+    async def execute_task(
+        self,
+        task: dict,
+        container_id: str,
+        worktree_path: str,
+        global_arch: str = "",
+    ) -> dict:
+        """Execute a task inside a Docker container using Claude Code.
+
+        Args:
+            task: Task dict.
+            container_id: Docker container ID to exec into.
+            worktree_path: Host path to the worktree (mounted at /workspace).
+            global_arch: Optional architecture context.
+
+        Returns:
+            Dict with status, output, files_changed, and exit_code.
+        """
+        prompt = self.prepare_task_prompt(task, global_arch)
+
+        # Write prompt to temp file in worktree so it's visible inside the container
+        prompt_file = os.path.join(worktree_path, ".task_prompt.txt")
+        with open(prompt_file, "w") as f:
+            f.write(prompt)
+
+        cmd = f"docker exec {container_id} claude --print --prompt-file /workspace/.task_prompt.txt"
+
+        try:
+            child = pexpect.spawn(cmd, timeout=self.timeout, encoding="utf-8")
+            child.expect(pexpect.EOF, timeout=self.timeout)
+            output = child.before or ""
+            child.close()
+            exit_code = child.exitstatus if child.exitstatus is not None else -1
+        except pexpect.TIMEOUT:
+            try:
+                child.close(force=True)
+            except Exception:
+                pass
+            return {
+                "status": "failed",
+                "output": "timeout",
+                "files_changed": [],
+                "exit_code": -1,
+            }
+        finally:
+            # Clean up prompt file
+            try:
+                os.remove(prompt_file)
+            except OSError:
+                pass
+
+        parsed = self.parse_claude_output(output)
+
+        if exit_code == 0:
+            status = "success"
+        else:
+            status = "failed"
+
+        return {
+            "status": status,
+            "output": output,
+            "files_changed": parsed["files_changed"],
+            "exit_code": exit_code,
+        }
+
+    def parse_claude_output(self, output: str) -> dict:
+        """Parse Claude Code output to extract structured info.
+
+        Args:
+            output: Raw stdout from Claude Code.
+
+        Returns:
+            Dict with files_changed, test_results, and errors.
+        """
+        # Extract file paths (common patterns: Created/Modified/Updated path/to/file.py)
+        file_patterns = re.findall(
+            r"(?:(?:Creat|Modifi|Updat|Edit|Writ)(?:ed|ing)\s+)([^\s]+\.\w+)",
+            output,
+        )
+        # Also catch paths that look like source files mentioned standalone
+        standalone_paths = re.findall(
+            r"(?:^|\s)([\w./]+\.(?:py|js|ts|yaml|yml|json|txt|md|toml|cfg))\b",
+            output,
+        )
+        all_files = list(dict.fromkeys(file_patterns + standalone_paths))  # dedupe, preserve order
+
+        # Extract test results
+        test_results = {}
+        passed_match = re.search(r"(\d+)\s+passed", output)
+        failed_match = re.search(r"(\d+)\s+failed", output)
+        if passed_match:
+            test_results["passed"] = int(passed_match.group(1))
+        if failed_match:
+            test_results["failed"] = int(failed_match.group(1))
+
+        # Extract error messages
+        errors = re.findall(r"(?:Error|Exception|FAILED)[:\s]+(.*?)(?:\n|$)", output, re.IGNORECASE)
+
+        return {
+            "files_changed": all_files,
+            "test_results": test_results,
+            "errors": errors,
+        }
+
+    async def execute_with_retry(
+        self,
+        task: dict,
+        container_id: str,
+        worktree_path: str,
+        global_arch: str = "",
+    ) -> dict:
+        """Execute a task with retry logic.
+
+        Retries up to max_retries times on failure. If all retries are exhausted,
+        returns a result with status 'needs_clarification'.
+
+        Args:
+            task: Task dict.
+            container_id: Docker container ID.
+            worktree_path: Host worktree path.
+            global_arch: Optional architecture context.
+
+        Returns:
+            Final execution result dict.
+        """
+        task_id = str(task.get("task_id", task.get("id", "")))
+
+        for attempt in range(self.max_retries):
+            self._retry_counts[task_id] = attempt + 1
+            result = await self.execute_task(task, container_id, worktree_path, global_arch)
+            if result["status"] == "success":
+                return result
+
+        # All retries exhausted
+        return {
+            "status": "needs_clarification",
+            "output": result.get("output", ""),
+            "files_changed": result.get("files_changed", []),
+            "exit_code": result.get("exit_code", -1),
+        }
+
+    def get_retry_count(self, task_id: str) -> int:
+        """Return current retry count for a task."""
+        return self._retry_counts.get(task_id, 0)
+
+    def reset_retry_count(self, task_id: str):
+        """Reset retry counter for a task (after clarification resolved)."""
+        self._retry_counts.pop(task_id, None)
--- a/app_factory/agents/pm_agent.py
+++ b/app_factory/agents/pm_agent.py
@@ -0,0 +1,136 @@
+"""Project Manager Agent - Expands user prompts into structured PRDs and handles clarification requests."""
+
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+from app_factory.core.claude_client import ClaudeSDKClient
+
+
+class PMAgent:
+    """Agent responsible for PRD generation, clarification handling, and project planning."""
+
+    def __init__(
+        self,
+        api_key: str = None,
+        auth_token: str = None,
+        model: str = "claude-opus-4-6",
+        debug: bool = False,
+        observability=None,
+    ):
+        self.model = model
+        self.input_tokens = 0
+        self.output_tokens = 0
+        self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
+        self.observability = observability
+
+        resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
+        resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
+        self.client = ClaudeSDKClient(
+            api_key=resolved_key,
+            auth_token=resolved_auth,
+            enable_debug=debug,
+        )
+
+    def _load_template(self, template_name: str) -> str:
+        """Load a prompt template file from app_factory/prompts/."""
+        path = self._prompts_dir / template_name
+        return path.read_text()
+
+    async def expand_prompt_to_prd(self, user_input: str) -> str:
+        """Expand a user prompt into a structured PRD using Claude.
+
+        Returns markdown with sections: Objective, Core Requirements,
+        Technical Architecture, Tech Stack, Success Criteria, Non-Functional Requirements.
+        """
+        system_prompt = self._load_template("pm_prd_expansion.txt")
+
+        response = await self.client.complete(
+            prompt=user_input,
+            model=self.model,
+            system_prompt=system_prompt,
+            max_turns=100,
+            observability=self.observability,
+            agent_name="pm_agent",
+            task_id="expand_prd",
+        )
+
+        self.input_tokens += response.input_tokens
+        self.output_tokens += response.output_tokens
+        if self.observability:
+            self.observability.log_token_usage(
+                "pm_agent",
+                "expand_prd",
+                input_tokens=response.input_tokens,
+                output_tokens=response.output_tokens,
+                model=self.model,
+            )
+
+        return response.text
+
+    async def handle_clarification_request(self, clarification: dict) -> str:
+        """Handle a clarification request from a downstream agent.
+
+        Args:
+            clarification: dict with keys requesting_agent, task_id, question, context.
+
+        Returns:
+            Clarification response string. If the question requires human input,
+            prompts the user and returns their answer.
+        """
+        template = self._load_template("pm_clarification.txt")
+        prompt = template.format(
+            requesting_agent=clarification.get("requesting_agent", "unknown"),
+            task_id=clarification.get("task_id", "N/A"),
+            question=clarification.get("question", ""),
+            context=clarification.get("context", ""),
+        )
+
+        response = await self.client.complete(
+            prompt=prompt,
+            model=self.model,
+            max_turns=100,
+            observability=self.observability,
+            agent_name="pm_agent",
+            task_id=f"clarification:{clarification.get('task_id', 'N/A')}",
+        )
+
+        self.input_tokens += response.input_tokens
+        self.output_tokens += response.output_tokens
+        if self.observability:
+            self.observability.log_token_usage(
+                "pm_agent",
+                f"clarification:{clarification.get('task_id', 'N/A')}",
+                input_tokens=response.input_tokens,
+                output_tokens=response.output_tokens,
+                model=self.model,
+            )
+
+        answer = response.text.strip()
+
+        if "ESCALATE_TO_HUMAN" in answer:
+            human_answer = input(
+                f"[PMAgent] Clarification needed for {clarification.get('requesting_agent', 'agent')} "
+                f"(task {clarification.get('task_id', 'N/A')}): "
+                f"{clarification.get('question', '')}\n> "
+            )
+            return human_answer
+
+        return answer
+
+    def update_prd(self, prd_path: str, updates: str):
+        """Append updates to an existing PRD file with a versioned header."""
+        timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+        header = f"\n\n---\n## PRD Update - {timestamp}\n\n"
+
+        with open(prd_path, "a") as f:
+            f.write(header)
+            f.write(updates)
+
+    def get_token_usage(self) -> dict:
+        """Return cumulative token usage."""
+        return {
+            "input_tokens": self.input_tokens,
+            "output_tokens": self.output_tokens,
+            "total_tokens": self.input_tokens + self.output_tokens,
+        }
--- a/app_factory/agents/qa_agent.py
+++ b/app_factory/agents/qa_agent.py
@@ -0,0 +1,383 @@
+"""QA Agent - Handles code review, testing, linting, and merge operations."""
+
+import os
+import re
+import subprocess
+from pathlib import Path
+
+import git
+
+from app_factory.core.claude_client import ClaudeSDKClient
+
+
+class QAAgent:
+    """Reviews code, runs tests, handles merge conflicts, merges worktrees to main."""
+
+    def __init__(
+        self,
+        repo_path: str,
+        api_key: str = None,
+        auth_token: str = None,
+        max_retries: int = 3,
+        debug: bool = False,
+        observability=None,
+    ):
+        """Initialize QAAgent.
+
+        Args:
+            repo_path: Path to the git repository.
+            api_key: Optional API key. Falls back to ANTHROPIC_API_KEY env var.
+            max_retries: Maximum QA-Dev bounce retries per task.
+        """
+        self.repo = git.Repo(repo_path)
+        self.repo_path = Path(repo_path).resolve()
+        self.max_retries = max_retries
+        self._retry_counts: dict[str, int] = {}
+        self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
+        self.observability = observability
+
+        resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
+        resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
+        self.client = ClaudeSDKClient(
+            api_key=resolved_key,
+            auth_token=resolved_auth,
+            enable_debug=debug,
+        )
+
+    async def review_and_merge(self, task_id: str, worktree_path: str, task: dict = None) -> dict:
+        """Full QA pipeline: rebase, lint, test, review, merge.
+
+        Returns:
+            dict with status and details. Status is one of:
+            'merged', 'rebase_failed', 'lint_failed', 'tests_failed', 'review_failed'.
+        """
+        # 1. Rebase feature branch onto main
+        rebase_result = await self.rebase_onto_main(worktree_path, task_id)
+        if not rebase_result["success"]:
+            self._increment_retry(task_id)
+            return {
+                "status": "rebase_failed",
+                "conflicts": rebase_result.get("conflicts", []),
+                "retry_count": self.get_retry_count(task_id),
+            }
+
+        # 2. Run linting
+        lint_result = self.run_linter(worktree_path)
+        if not lint_result["passed"]:
+            self._increment_retry(task_id)
+            return {
+                "status": "lint_failed",
+                "errors": lint_result["errors"],
+                "warnings": lint_result["warnings"],
+                "retry_count": self.get_retry_count(task_id),
+            }
+
+        # 3. Run tests
+        test_result = self.run_tests(worktree_path)
+        if not test_result["passed"]:
+            self._increment_retry(task_id)
+            return {
+                "status": "tests_failed",
+                "total": test_result["total"],
+                "failures": test_result["failures"],
+                "errors": test_result["errors"],
+                "output": test_result["output"],
+                "retry_count": self.get_retry_count(task_id),
+            }
+
+        # 4. Code review via Claude
+        wt_repo = git.Repo(worktree_path)
+        diff = wt_repo.git.diff("main", "--", ".")
+        review_result = await self.code_review(diff, task=task)
+        if not review_result["approved"]:
+            self._increment_retry(task_id)
+            return {
+                "status": "review_failed",
+                "issues": review_result["issues"],
+                "summary": review_result["summary"],
+                "retry_count": self.get_retry_count(task_id),
+            }
+
+        # 5. Merge to main
+        merge_result = self.merge_to_main(worktree_path, task_id)
+        if not merge_result["success"]:
+            return {
+                "status": "merge_failed",
+                "error": merge_result.get("error", "Unknown merge error"),
+            }
+
+        return {
+            "status": "merged",
+            "commit_sha": merge_result["commit_sha"],
+            "review_summary": review_result["summary"],
+        }
+
+    async def rebase_onto_main(self, worktree_path: str, task_id: str) -> dict:
+        """Rebase the feature branch in the worktree onto main.
+
+        Returns:
+            dict with success bool and conflicts list.
+        """
+        wt_repo = git.Repo(worktree_path)
+        try:
+            wt_repo.git.fetch("origin", "main")
+        except git.GitCommandError:
+            pass  # fetch may fail in local-only repos; continue with local main
+
+        try:
+            wt_repo.git.rebase("main")
+            return {"success": True, "conflicts": []}
+        except git.GitCommandError:
+            # Rebase failed — check for conflicts
+            conflicts = self._get_conflict_files(wt_repo)
+            if conflicts and self.auto_resolve_conflicts(worktree_path):
+                return {"success": True, "conflicts": []}
+            # Abort the failed rebase
+            try:
+                wt_repo.git.rebase("--abort")
+            except git.GitCommandError:
+                pass
+            return {"success": False, "conflicts": conflicts}
+
+    def run_linter(self, worktree_path: str) -> dict:
+        """Run ruff linter on the worktree.
+
+        Returns:
+            dict with passed bool, errors list, and warnings list.
+        """
+        try:
+            result = subprocess.run(
+                ["ruff", "check", "."],
+                cwd=worktree_path,
+                capture_output=True,
+                text=True,
+                timeout=120,
+            )
+        except FileNotFoundError:
+            return {"passed": True, "errors": [], "warnings": ["ruff not found, skipping lint"]}
+        except subprocess.TimeoutExpired:
+            return {"passed": False, "errors": ["Linter timed out"], "warnings": []}
+
+        errors = []
+        warnings = []
+        for line in result.stdout.splitlines():
+            line = line.strip()
+            if not line or line.startswith("Found") or line.startswith("All checks"):
+                continue
+            # ruff output lines contain error codes like E501, W291, etc.
+            if re.search(r"\b[A-Z]\d{3,4}\b", line):
+                errors.append(line)
+            elif line:
+                warnings.append(line)
+
+        passed = result.returncode == 0
+        return {"passed": passed, "errors": errors, "warnings": warnings}
+
+    def run_tests(self, worktree_path: str) -> dict:
+        """Run pytest in the worktree.
+
+        Returns:
+            dict with passed bool, total/failures/errors counts, and raw output.
+        """
+        try:
+            result = subprocess.run(
+                ["python", "-m", "pytest", "-v", "--tb=short"],
+                cwd=worktree_path,
+                capture_output=True,
+                text=True,
+                timeout=300,
+            )
+        except FileNotFoundError:
+            return {"passed": False, "total": 0, "failures": 0, "errors": 1,
+                    "output": "pytest not found"}
+        except subprocess.TimeoutExpired:
+            return {"passed": False, "total": 0, "failures": 0, "errors": 1,
+                    "output": "Test execution timed out"}
+
+        output = result.stdout + result.stderr
+        parsed = self.parse_test_results(output)
+        parsed["output"] = output
+        return parsed
+
+    async def code_review(self, diff: str, task: dict = None) -> dict:
+        """Review a diff using Claude for quality and security issues.
+
+        Returns:
+            dict with approved bool, issues list, and summary string.
+        """
+        template = self._load_template("qa_review.txt")
+        task_context = ""
+        if task:
+            task_context = (
+                f"Task ID: {task.get('id', 'N/A')}\n"
+                f"Title: {task.get('title', 'N/A')}\n"
+                f"Description: {task.get('description', 'N/A')}"
+            )
+
+        prompt = template.format(task_context=task_context, diff=diff)
+
+        response = await self.client.complete(
+            prompt=prompt,
+            model="claude-sonnet-4-6",
+            max_turns=100,
+            observability=self.observability,
+            agent_name="qa_agent",
+            task_id=str(task.get("id", task.get("task_id", "review"))) if task else "review",
+        )
+        if self.observability:
+            self.observability.log_token_usage(
+                "qa_agent",
+                str(task.get("id", task.get("task_id", "review"))) if task else "review",
+                input_tokens=response.input_tokens,
+                output_tokens=response.output_tokens,
+                model="claude-sonnet-4-6",
+            )
+
+        text = response.text
+        return self._parse_review_response(text)
+
+    def merge_to_main(self, worktree_path: str, task_id: str) -> dict:
+        """Merge the feature branch into main with --no-ff.
+
+        Returns:
+            dict with success bool and commit_sha.
+        """
+        branch_name = f"feature/task-{task_id}"
+        try:
+            self.repo.git.checkout("main")
+            self.repo.git.merge("--no-ff", branch_name, m=f"Merge {branch_name}")
+            commit_sha = self.repo.head.commit.hexsha
+            return {"success": True, "commit_sha": commit_sha}
+        except git.GitCommandError as e:
+            return {"success": False, "commit_sha": None, "error": str(e)}
+
+    def auto_resolve_conflicts(self, worktree_path: str) -> bool:
+        """Try to auto-resolve simple merge conflicts.
+
+        Returns True if all conflicts were resolved.
+        """
+        wt_repo = git.Repo(worktree_path)
+        unmerged = wt_repo.index.unmerged_blobs()
+        if not unmerged:
+            return True
+
+        for path in unmerged:
+            file_path = os.path.join(worktree_path, path)
+            if not os.path.exists(file_path):
+                continue
+            try:
+                with open(file_path) as f:
+                    content = f.read()
+                # Accept "theirs" (incoming) for simple conflicts
+                if "<<<<<<< " in content and "=======" in content and ">>>>>>> " in content:
+                    resolved = re.sub(
+                        r"<<<<<<< [^\n]*\n.*?=======\n(.*?)>>>>>>> [^\n]*\n",
+                        r"\1",
+                        content,
+                        flags=re.DOTALL,
+                    )
+                    with open(file_path, "w") as f:
+                        f.write(resolved)
+                    wt_repo.index.add([path])
+                else:
+                    return False
+            except Exception:
+                return False
+
+        try:
+            wt_repo.git.rebase("--continue")
+            return True
+        except git.GitCommandError:
+            return False
+
+    def parse_test_results(self, output: str) -> dict:
+        """Parse pytest output into structured results.
+
+        Returns:
+            dict with passed bool, total int, failures int, errors int.
+        """
+        # Match pytest summary line like "5 passed, 2 failed, 1 error"
+        passed_count = 0
+        failed_count = 0
+        error_count = 0
+
+        # Look for the summary line
+        summary_match = re.search(
+            r"=+\s*(.*?)\s*=+\s*$",
+            output,
+            re.MULTILINE,
+        )
+        if summary_match:
+            summary_line = summary_match.group(1)
+            p = re.search(r"(\d+)\s+passed", summary_line)
+            f = re.search(r"(\d+)\s+failed", summary_line)
+            e = re.search(r"(\d+)\s+error", summary_line)
+            if p:
+                passed_count = int(p.group(1))
+            if f:
+                failed_count = int(f.group(1))
+            if e:
+                error_count = int(e.group(1))
+
+        total = passed_count + failed_count + error_count
+        all_passed = failed_count == 0 and error_count == 0 and total > 0
+
+        return {
+            "passed": all_passed,
+            "total": total,
+            "failures": failed_count,
+            "errors": error_count,
+        }
+
+    def get_retry_count(self, task_id: str) -> int:
+        """Return QA retry count for a task."""
+        return self._retry_counts.get(task_id, 0)
+
+    def _increment_retry(self, task_id: str):
+        """Increment the retry counter for a task."""
+        self._retry_counts[task_id] = self._retry_counts.get(task_id, 0) + 1
+
+    def _load_template(self, template_name: str) -> str:
+        """Load a prompt template file from app_factory/prompts/."""
+        path = self._prompts_dir / template_name
+        return path.read_text()
+
+    def _get_conflict_files(self, repo: git.Repo) -> list[str]:
+        """Get list of conflicting files from a repo."""
+        try:
+            status_output = repo.git.status("--porcelain")
+            conflicts = []
+            for line in status_output.splitlines():
+                if line.startswith("UU ") or line.startswith("AA "):
+                    conflicts.append(line[3:].strip())
+            return conflicts
+        except git.GitCommandError:
+            return []
+
+    def _parse_review_response(self, text: str) -> dict:
+        """Parse Claude's review response into structured data."""
+        approved = False
+        issues = []
+        summary = ""
+
+        for line in text.splitlines():
+            line = line.strip()
+            if line.upper().startswith("APPROVED:"):
+                value = line.split(":", 1)[1].strip().lower()
+                approved = value in ("true", "yes")
+            elif line.startswith("- ["):
+                # Parse issue lines like "- [severity: critical] description"
+                issue_match = re.match(
+                    r"-\s*\[severity:\s*(critical|warning|info)\]\s*(.*)",
+                    line,
+                    re.IGNORECASE,
+                )
+                if issue_match:
+                    issues.append({
+                        "severity": issue_match.group(1).lower(),
+                        "description": issue_match.group(2).strip(),
+                    })
+            elif line.upper().startswith("SUMMARY:"):
+                summary = line.split(":", 1)[1].strip()
+
+        return {"approved": approved, "issues": issues, "summary": summary}
--- a/app_factory/agents/task_agent.py
+++ b/app_factory/agents/task_agent.py
@@ -0,0 +1,180 @@
+"""Task Master Agent - Bridge to claude-task-master for task graph management."""
+
+import asyncio
+import json
+import logging
+import os
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class TaskMasterAgent:
+    """Bridge to claude-task-master for task graph management and dependency resolution."""
+
+    def __init__(self, project_root: str, mcp_client=None):
+        self.project_root = str(project_root)
+        self.mcp_client = mcp_client
+        self.max_retries = 3
+        self.base_delay = 1.0
+
+    async def parse_prd(self, prd_content: str, num_tasks: int = 10) -> dict:
+        """Write PRD content to disk and invoke task-master parse-prd."""
+        docs_dir = Path(self.project_root) / ".taskmaster" / "docs"
+        docs_dir.mkdir(parents=True, exist_ok=True)
+        prd_path = docs_dir / "prd.md"
+        prd_path.write_text(prd_content)
+
+        result = await self._call_with_retry(
+            self._run_cli,
+            "parse-prd",
+            str(prd_path),
+            "--num-tasks",
+            str(num_tasks),
+            "--force",
+        )
+        return result
+
+    async def get_unblocked_tasks(self) -> list:
+        """Get all pending tasks whose dependencies are all done."""
+        result = await self._call_with_retry(self._run_cli, "list", "--json")
+        tasks = result.get("tasks", [])
+
+        done_ids = {
+            str(t["id"]) for t in tasks if t.get("status") == "done"
+        }
+
+        unblocked = []
+        for task in tasks:
+            if task.get("status") != "pending":
+                continue
+            deps = [str(d) for d in task.get("dependencies", [])]
+            if all(d in done_ids for d in deps):
+                unblocked.append(task)
+
+        return unblocked
+
+    async def update_task_status(
+        self, task_id: str, status: str, notes: str = ""
+    ):
+        """Update a task's status and optionally add implementation notes."""
+        await self._call_with_retry(
+            self._run_cli,
+            "set-status",
+            f"--id={task_id}",
+            f"--status={status}",
+        )
+        if notes:
+            await self._call_with_retry(
+                self._run_cli,
+                "update-subtask",
+                f"--id={task_id}",
+                f"--prompt={notes}",
+            )
+
+    async def get_task_details(self, task_id: str) -> dict:
+        """Get full details for a specific task."""
+        result = await self._call_with_retry(
+            self._run_cli, "show", str(task_id), "--json"
+        )
+        task = result.get("task", result)
+        return {
+            "id": task.get("id"),
+            "title": task.get("title", ""),
+            "description": task.get("description", ""),
+            "details": task.get("details", ""),
+            "testStrategy": task.get("testStrategy", ""),
+            "dependencies": task.get("dependencies", []),
+            "subtasks": task.get("subtasks", []),
+            "status": task.get("status", "pending"),
+            "priority": task.get("priority", ""),
+        }
+
+    async def get_next_task(self) -> dict | None:
+        """Get the highest-priority unblocked task, or None."""
+        try:
+            result = await self._call_with_retry(
+                self._run_cli, "next", "--json"
+            )
+            task = result.get("task", result)
+            if task and task.get("id"):
+                return task
+        except RuntimeError:
+            logger.debug("next_task command failed, falling back to manual selection")
+
+        unblocked = await self.get_unblocked_tasks()
+        if not unblocked:
+            return None
+
+        priority_order = {"high": 0, "medium": 1, "low": 2}
+        unblocked.sort(
+            key=lambda t: (
+                priority_order.get(t.get("priority", "medium"), 1),
+                t.get("id", 0),
+            )
+        )
+        return unblocked[0]
+
+    async def expand_task(self, task_id: str, num_subtasks: int = 5) -> dict:
+        """Break a task into subtasks."""
+        result = await self._call_with_retry(
+            self._run_cli,
+            "expand",
+            f"--id={task_id}",
+            f"--num={num_subtasks}",
+            "--force",
+        )
+        return result
+
+    async def _call_with_retry(self, func, *args, **kwargs):
+        """Retry with exponential backoff."""
+        last_exc = None
+        for attempt in range(self.max_retries):
+            try:
+                return await func(*args, **kwargs)
+            except Exception as exc:
+                last_exc = exc
+                if attempt < self.max_retries - 1:
+                    delay = self.base_delay * (2 ** attempt)
+                    logger.warning(
+                        "Attempt %d/%d failed: %s. Retrying in %.1fs",
+                        attempt + 1,
+                        self.max_retries,
+                        exc,
+                        delay,
+                    )
+                    await asyncio.sleep(delay)
+        raise RuntimeError(
+            f"All {self.max_retries} attempts failed. Last error: {last_exc}"
+        ) from last_exc
+
+    async def _run_cli(self, *args: str) -> dict:
+        """Execute a task-master CLI command and return parsed JSON output."""
+        cmd = ["task-master", *args]
+        logger.debug("Running CLI: %s", " ".join(cmd))
+
+        proc = await asyncio.get_event_loop().run_in_executor(
+            None,
+            lambda: subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                cwd=self.project_root,
+                timeout=120,
+            ),
+        )
+
+        if proc.returncode != 0:
+            raise RuntimeError(
+                f"task-master {args[0]} failed (rc={proc.returncode}): {proc.stderr.strip()}"
+            )
+
+        stdout = proc.stdout.strip()
+        if not stdout:
+            return {}
+
+        try:
+            return json.loads(stdout)
+        except json.JSONDecodeError:
+            return {"raw_output": stdout}