384 lines
14 KiB
Python
384 lines
14 KiB
Python
"""QA Agent - Handles code review, testing, linting, and merge operations."""
|
|
|
|
import os
|
|
import re
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import git
|
|
|
|
from app_factory.core.claude_client import ClaudeSDKClient
|
|
|
|
|
|
class QAAgent:
|
|
"""Reviews code, runs tests, handles merge conflicts, merges worktrees to main."""
|
|
|
|
def __init__(
|
|
self,
|
|
repo_path: str,
|
|
api_key: str = None,
|
|
auth_token: str = None,
|
|
max_retries: int = 3,
|
|
debug: bool = False,
|
|
observability=None,
|
|
):
|
|
"""Initialize QAAgent.
|
|
|
|
Args:
|
|
repo_path: Path to the git repository.
|
|
api_key: Optional API key. Falls back to ANTHROPIC_API_KEY env var.
|
|
max_retries: Maximum QA-Dev bounce retries per task.
|
|
"""
|
|
self.repo = git.Repo(repo_path)
|
|
self.repo_path = Path(repo_path).resolve()
|
|
self.max_retries = max_retries
|
|
self._retry_counts: dict[str, int] = {}
|
|
self._prompts_dir = Path(__file__).resolve().parent.parent / "prompts"
|
|
self.observability = observability
|
|
|
|
resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
resolved_auth = auth_token or os.environ.get("ANTHROPIC_AUTH_TOKEN")
|
|
self.client = ClaudeSDKClient(
|
|
api_key=resolved_key,
|
|
auth_token=resolved_auth,
|
|
enable_debug=debug,
|
|
)
|
|
|
|
async def review_and_merge(self, task_id: str, worktree_path: str, task: dict = None) -> dict:
|
|
"""Full QA pipeline: rebase, lint, test, review, merge.
|
|
|
|
Returns:
|
|
dict with status and details. Status is one of:
|
|
'merged', 'rebase_failed', 'lint_failed', 'tests_failed', 'review_failed'.
|
|
"""
|
|
# 1. Rebase feature branch onto main
|
|
rebase_result = await self.rebase_onto_main(worktree_path, task_id)
|
|
if not rebase_result["success"]:
|
|
self._increment_retry(task_id)
|
|
return {
|
|
"status": "rebase_failed",
|
|
"conflicts": rebase_result.get("conflicts", []),
|
|
"retry_count": self.get_retry_count(task_id),
|
|
}
|
|
|
|
# 2. Run linting
|
|
lint_result = self.run_linter(worktree_path)
|
|
if not lint_result["passed"]:
|
|
self._increment_retry(task_id)
|
|
return {
|
|
"status": "lint_failed",
|
|
"errors": lint_result["errors"],
|
|
"warnings": lint_result["warnings"],
|
|
"retry_count": self.get_retry_count(task_id),
|
|
}
|
|
|
|
# 3. Run tests
|
|
test_result = self.run_tests(worktree_path)
|
|
if not test_result["passed"]:
|
|
self._increment_retry(task_id)
|
|
return {
|
|
"status": "tests_failed",
|
|
"total": test_result["total"],
|
|
"failures": test_result["failures"],
|
|
"errors": test_result["errors"],
|
|
"output": test_result["output"],
|
|
"retry_count": self.get_retry_count(task_id),
|
|
}
|
|
|
|
# 4. Code review via Claude
|
|
wt_repo = git.Repo(worktree_path)
|
|
diff = wt_repo.git.diff("main", "--", ".")
|
|
review_result = await self.code_review(diff, task=task)
|
|
if not review_result["approved"]:
|
|
self._increment_retry(task_id)
|
|
return {
|
|
"status": "review_failed",
|
|
"issues": review_result["issues"],
|
|
"summary": review_result["summary"],
|
|
"retry_count": self.get_retry_count(task_id),
|
|
}
|
|
|
|
# 5. Merge to main
|
|
merge_result = self.merge_to_main(worktree_path, task_id)
|
|
if not merge_result["success"]:
|
|
return {
|
|
"status": "merge_failed",
|
|
"error": merge_result.get("error", "Unknown merge error"),
|
|
}
|
|
|
|
return {
|
|
"status": "merged",
|
|
"commit_sha": merge_result["commit_sha"],
|
|
"review_summary": review_result["summary"],
|
|
}
|
|
|
|
async def rebase_onto_main(self, worktree_path: str, task_id: str) -> dict:
|
|
"""Rebase the feature branch in the worktree onto main.
|
|
|
|
Returns:
|
|
dict with success bool and conflicts list.
|
|
"""
|
|
wt_repo = git.Repo(worktree_path)
|
|
try:
|
|
wt_repo.git.fetch("origin", "main")
|
|
except git.GitCommandError:
|
|
pass # fetch may fail in local-only repos; continue with local main
|
|
|
|
try:
|
|
wt_repo.git.rebase("main")
|
|
return {"success": True, "conflicts": []}
|
|
except git.GitCommandError:
|
|
# Rebase failed — check for conflicts
|
|
conflicts = self._get_conflict_files(wt_repo)
|
|
if conflicts and self.auto_resolve_conflicts(worktree_path):
|
|
return {"success": True, "conflicts": []}
|
|
# Abort the failed rebase
|
|
try:
|
|
wt_repo.git.rebase("--abort")
|
|
except git.GitCommandError:
|
|
pass
|
|
return {"success": False, "conflicts": conflicts}
|
|
|
|
def run_linter(self, worktree_path: str) -> dict:
|
|
"""Run ruff linter on the worktree.
|
|
|
|
Returns:
|
|
dict with passed bool, errors list, and warnings list.
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["ruff", "check", "."],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=120,
|
|
)
|
|
except FileNotFoundError:
|
|
return {"passed": True, "errors": [], "warnings": ["ruff not found, skipping lint"]}
|
|
except subprocess.TimeoutExpired:
|
|
return {"passed": False, "errors": ["Linter timed out"], "warnings": []}
|
|
|
|
errors = []
|
|
warnings = []
|
|
for line in result.stdout.splitlines():
|
|
line = line.strip()
|
|
if not line or line.startswith("Found") or line.startswith("All checks"):
|
|
continue
|
|
# ruff output lines contain error codes like E501, W291, etc.
|
|
if re.search(r"\b[A-Z]\d{3,4}\b", line):
|
|
errors.append(line)
|
|
elif line:
|
|
warnings.append(line)
|
|
|
|
passed = result.returncode == 0
|
|
return {"passed": passed, "errors": errors, "warnings": warnings}
|
|
|
|
def run_tests(self, worktree_path: str) -> dict:
|
|
"""Run pytest in the worktree.
|
|
|
|
Returns:
|
|
dict with passed bool, total/failures/errors counts, and raw output.
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["python", "-m", "pytest", "-v", "--tb=short"],
|
|
cwd=worktree_path,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300,
|
|
)
|
|
except FileNotFoundError:
|
|
return {"passed": False, "total": 0, "failures": 0, "errors": 1,
|
|
"output": "pytest not found"}
|
|
except subprocess.TimeoutExpired:
|
|
return {"passed": False, "total": 0, "failures": 0, "errors": 1,
|
|
"output": "Test execution timed out"}
|
|
|
|
output = result.stdout + result.stderr
|
|
parsed = self.parse_test_results(output)
|
|
parsed["output"] = output
|
|
return parsed
|
|
|
|
async def code_review(self, diff: str, task: dict = None) -> dict:
|
|
"""Review a diff using Claude for quality and security issues.
|
|
|
|
Returns:
|
|
dict with approved bool, issues list, and summary string.
|
|
"""
|
|
template = self._load_template("qa_review.txt")
|
|
task_context = ""
|
|
if task:
|
|
task_context = (
|
|
f"Task ID: {task.get('id', 'N/A')}\n"
|
|
f"Title: {task.get('title', 'N/A')}\n"
|
|
f"Description: {task.get('description', 'N/A')}"
|
|
)
|
|
|
|
prompt = template.format(task_context=task_context, diff=diff)
|
|
|
|
response = await self.client.complete(
|
|
prompt=prompt,
|
|
model="claude-sonnet-4-6",
|
|
max_turns=100,
|
|
observability=self.observability,
|
|
agent_name="qa_agent",
|
|
task_id=str(task.get("id", task.get("task_id", "review"))) if task else "review",
|
|
)
|
|
if self.observability:
|
|
self.observability.log_token_usage(
|
|
"qa_agent",
|
|
str(task.get("id", task.get("task_id", "review"))) if task else "review",
|
|
input_tokens=response.input_tokens,
|
|
output_tokens=response.output_tokens,
|
|
model="claude-sonnet-4-6",
|
|
)
|
|
|
|
text = response.text
|
|
return self._parse_review_response(text)
|
|
|
|
def merge_to_main(self, worktree_path: str, task_id: str) -> dict:
|
|
"""Merge the feature branch into main with --no-ff.
|
|
|
|
Returns:
|
|
dict with success bool and commit_sha.
|
|
"""
|
|
branch_name = f"feature/task-{task_id}"
|
|
try:
|
|
self.repo.git.checkout("main")
|
|
self.repo.git.merge("--no-ff", branch_name, m=f"Merge {branch_name}")
|
|
commit_sha = self.repo.head.commit.hexsha
|
|
return {"success": True, "commit_sha": commit_sha}
|
|
except git.GitCommandError as e:
|
|
return {"success": False, "commit_sha": None, "error": str(e)}
|
|
|
|
def auto_resolve_conflicts(self, worktree_path: str) -> bool:
|
|
"""Try to auto-resolve simple merge conflicts.
|
|
|
|
Returns True if all conflicts were resolved.
|
|
"""
|
|
wt_repo = git.Repo(worktree_path)
|
|
unmerged = wt_repo.index.unmerged_blobs()
|
|
if not unmerged:
|
|
return True
|
|
|
|
for path in unmerged:
|
|
file_path = os.path.join(worktree_path, path)
|
|
if not os.path.exists(file_path):
|
|
continue
|
|
try:
|
|
with open(file_path) as f:
|
|
content = f.read()
|
|
# Accept "theirs" (incoming) for simple conflicts
|
|
if "<<<<<<< " in content and "=======" in content and ">>>>>>> " in content:
|
|
resolved = re.sub(
|
|
r"<<<<<<< [^\n]*\n.*?=======\n(.*?)>>>>>>> [^\n]*\n",
|
|
r"\1",
|
|
content,
|
|
flags=re.DOTALL,
|
|
)
|
|
with open(file_path, "w") as f:
|
|
f.write(resolved)
|
|
wt_repo.index.add([path])
|
|
else:
|
|
return False
|
|
except Exception:
|
|
return False
|
|
|
|
try:
|
|
wt_repo.git.rebase("--continue")
|
|
return True
|
|
except git.GitCommandError:
|
|
return False
|
|
|
|
def parse_test_results(self, output: str) -> dict:
|
|
"""Parse pytest output into structured results.
|
|
|
|
Returns:
|
|
dict with passed bool, total int, failures int, errors int.
|
|
"""
|
|
# Match pytest summary line like "5 passed, 2 failed, 1 error"
|
|
passed_count = 0
|
|
failed_count = 0
|
|
error_count = 0
|
|
|
|
# Look for the summary line
|
|
summary_match = re.search(
|
|
r"=+\s*(.*?)\s*=+\s*$",
|
|
output,
|
|
re.MULTILINE,
|
|
)
|
|
if summary_match:
|
|
summary_line = summary_match.group(1)
|
|
p = re.search(r"(\d+)\s+passed", summary_line)
|
|
f = re.search(r"(\d+)\s+failed", summary_line)
|
|
e = re.search(r"(\d+)\s+error", summary_line)
|
|
if p:
|
|
passed_count = int(p.group(1))
|
|
if f:
|
|
failed_count = int(f.group(1))
|
|
if e:
|
|
error_count = int(e.group(1))
|
|
|
|
total = passed_count + failed_count + error_count
|
|
all_passed = failed_count == 0 and error_count == 0 and total > 0
|
|
|
|
return {
|
|
"passed": all_passed,
|
|
"total": total,
|
|
"failures": failed_count,
|
|
"errors": error_count,
|
|
}
|
|
|
|
def get_retry_count(self, task_id: str) -> int:
|
|
"""Return QA retry count for a task."""
|
|
return self._retry_counts.get(task_id, 0)
|
|
|
|
def _increment_retry(self, task_id: str):
|
|
"""Increment the retry counter for a task."""
|
|
self._retry_counts[task_id] = self._retry_counts.get(task_id, 0) + 1
|
|
|
|
def _load_template(self, template_name: str) -> str:
|
|
"""Load a prompt template file from app_factory/prompts/."""
|
|
path = self._prompts_dir / template_name
|
|
return path.read_text()
|
|
|
|
def _get_conflict_files(self, repo: git.Repo) -> list[str]:
|
|
"""Get list of conflicting files from a repo."""
|
|
try:
|
|
status_output = repo.git.status("--porcelain")
|
|
conflicts = []
|
|
for line in status_output.splitlines():
|
|
if line.startswith("UU ") or line.startswith("AA "):
|
|
conflicts.append(line[3:].strip())
|
|
return conflicts
|
|
except git.GitCommandError:
|
|
return []
|
|
|
|
def _parse_review_response(self, text: str) -> dict:
|
|
"""Parse Claude's review response into structured data."""
|
|
approved = False
|
|
issues = []
|
|
summary = ""
|
|
|
|
for line in text.splitlines():
|
|
line = line.strip()
|
|
if line.upper().startswith("APPROVED:"):
|
|
value = line.split(":", 1)[1].strip().lower()
|
|
approved = value in ("true", "yes")
|
|
elif line.startswith("- ["):
|
|
# Parse issue lines like "- [severity: critical] description"
|
|
issue_match = re.match(
|
|
r"-\s*\[severity:\s*(critical|warning|info)\]\s*(.*)",
|
|
line,
|
|
re.IGNORECASE,
|
|
)
|
|
if issue_match:
|
|
issues.append({
|
|
"severity": issue_match.group(1).lower(),
|
|
"description": issue_match.group(2).strip(),
|
|
})
|
|
elif line.upper().startswith("SUMMARY:"):
|
|
summary = line.split(":", 1)[1].strip()
|
|
|
|
return {"approved": approved, "issues": issues, "summary": summary}
|