Enforce actor-level MCP policy wiring and Claude tool gates

2026-02-23 17:28:55 -05:00
parent 20e944f7d4
commit 3ca9bd3db8
5 changed files with 828 additions and 111 deletions
--- a/tests/orchestration-engine.test.ts
+++ b/tests/orchestration-engine.test.ts
@@ -5,6 +5,8 @@ import { tmpdir } from "node:os";
 import { resolve } from "node:path";
 import { SchemaDrivenExecutionEngine } from "../src/agents/orchestration.js";
 import type { ActorExecutionResult } from "../src/agents/pipeline.js";
+import { loadConfig } from "../src/config.js";
+import { createDefaultMcpRegistry, createMcpHandlerShell } from "../src/mcp.js";
 import { SecurityViolationError } from "../src/security/index.js";

 function createManifest(): unknown {
@@ -252,6 +254,163 @@ test("runs DAG pipeline with state-dependent routing and retry behavior", async
  assert.deepEqual(engine.planChildPersonas({ parentPersonaId: "task", depth: 1 }), ["coder"]);
 });

+test("injects mcp registry/config helpers and enforces Claude tool gate in actor executor", async () => {
+  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-workspace-"));
+  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-state-"));
+  const projectContextPath = resolve(stateRoot, "project-context.json");
+  const mcpConfigPath = resolve(workspaceRoot, "mcp.config.json");
+
+  await writeFile(
+    mcpConfigPath,
+    JSON.stringify(
+      {
+        servers: {
+          "task-master-tools": {
+            handler: "claude-task-master",
+            type: "stdio",
+            command: "node",
+            args: ["task-master-mcp.js"],
+            enabled_tools: ["read_file", "write_file", "search"],
+          },
+        },
+      },
+      null,
+      2,
+    ),
+    "utf8",
+  );
+
+  const config = loadConfig({
+    ...process.env,
+    MCP_CONFIG_PATH: mcpConfigPath,
+  });
+
+  const customRegistry = createDefaultMcpRegistry();
+  customRegistry.register(
+    createMcpHandlerShell({
+      id: "custom-task-mcp-handler",
+      description: "custom task handler",
+      matches: () => false,
+    }),
+  );
+
+  const manifest = {
+    schemaVersion: "1" as const,
+    topologies: ["sequential"],
+    personas: [
+      {
+        id: "task",
+        displayName: "Task",
+        systemPromptTemplate: "Task executor",
+        toolClearance: {
+          allowlist: ["read_file", "write_file"],
+          banlist: ["rm"],
+        },
+      },
+    ],
+    relationships: [],
+    topologyConstraints: {
+      maxDepth: 2,
+      maxRetries: 0,
+    },
+    pipeline: {
+      entryNodeId: "task-node",
+      nodes: [
+        {
+          id: "task-node",
+          actorId: "task_actor",
+          personaId: "task",
+        },
+      ],
+      edges: [],
+    },
+  };
+
+  const engine = new SchemaDrivenExecutionEngine({
+    manifest,
+    config,
+    mcpRegistry: customRegistry,
+    settings: {
+      workspaceRoot,
+      stateRoot,
+      projectContextPath,
+      maxChildren: 1,
+      maxDepth: 2,
+      maxRetries: 0,
+    },
+    actorExecutors: {
+      task_actor: async (input) => {
+        assert.equal(input.mcp.registry, customRegistry);
+
+        const codexConfig = input.mcp.resolveConfig({
+          providerHint: "codex",
+        });
+        const codexServer = (codexConfig.codexConfig?.mcp_servers as Record<string, Record<string, unknown>> | undefined)?.[
+          "task-master-tools"
+        ];
+        assert.ok(codexServer);
+        assert.deepEqual(codexServer.enabled_tools, ["read_file", "write_file"]);
+        assert.deepEqual(codexServer.disabled_tools, ["rm"]);
+
+        const claudeConfig = input.mcp.resolveConfig({
+          providerHint: "claude",
+        });
+        assert.ok(claudeConfig.claudeMcpServers?.["task-master-tools"]);
+
+        const canUseTool = input.mcp.createClaudeCanUseTool();
+        const allow = await canUseTool(
+          "mcp__claude-task-master__read_file",
+          {},
+          {
+            signal: new AbortController().signal,
+            toolUseID: "allow-1",
+          },
+        );
+        assert.deepEqual(allow, {
+          behavior: "allow",
+          toolUseID: "allow-1",
+        });
+
+        const denyBlocked = await canUseTool(
+          "mcp__claude-task-master__rm",
+          {},
+          {
+            signal: new AbortController().signal,
+            toolUseID: "deny-1",
+          },
+        );
+        assert.equal(denyBlocked.behavior, "deny");
+
+        const denyMissingAllowlist = await canUseTool(
+          "mcp__claude-task-master__search",
+          {},
+          {
+            signal: new AbortController().signal,
+            toolUseID: "deny-2",
+          },
+        );
+        assert.equal(denyMissingAllowlist.behavior, "deny");
+
+        return {
+          status: "success",
+          payload: {
+            ok: true,
+          },
+        };
+      },
+    },
+  });
+
+  const result = await engine.runSession({
+    sessionId: "session-mcp-gate-1",
+    initialPayload: {
+      task: "verify mcp gate",
+    },
+  });
+
+  assert.equal(result.status, "success");
+});
+
 test("runs parallel topology blocks concurrently and routes via domain-event edges", async () => {
  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-workspace-"));
  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-state-"));
@@ -916,3 +1075,105 @@ test("can map security violations to validation_fail for retry-unrolled remediat
    ["secure-node:validation_fail:1", "secure-node:success:2"],
  );
 });
+
+test("runtime event side-channel logs session and node lifecycle without changing pipeline behavior", async () => {
+  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-runtime-event-workspace-"));
+  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-runtime-event-state-"));
+  const projectContextPath = resolve(stateRoot, "project-context.json");
+  const runtimeEventLogRelativePath = ".ai_ops/events/test-runtime-events.ndjson";
+  const runtimeEventLogPath = resolve(workspaceRoot, runtimeEventLogRelativePath);
+
+  const manifest = {
+    schemaVersion: "1",
+    topologies: ["sequential"],
+    personas: [
+      {
+        id: "runner",
+        displayName: "Runner",
+        systemPromptTemplate: "Runner",
+        toolClearance: {
+          allowlist: ["read_file"],
+          banlist: [],
+        },
+      },
+    ],
+    relationships: [],
+    topologyConstraints: {
+      maxDepth: 2,
+      maxRetries: 0,
+    },
+    pipeline: {
+      entryNodeId: "node-1",
+      nodes: [
+        {
+          id: "node-1",
+          actorId: "runner_actor",
+          personaId: "runner",
+        },
+      ],
+      edges: [],
+    },
+  } as const;
+
+  const config = loadConfig({
+    AGENT_RUNTIME_EVENT_LOG_PATH: runtimeEventLogRelativePath,
+  });
+
+  const engine = new SchemaDrivenExecutionEngine({
+    manifest,
+    config,
+    settings: {
+      workspaceRoot,
+      stateRoot,
+      projectContextPath,
+      maxDepth: 2,
+      maxRetries: 0,
+      maxChildren: 1,
+      runtimeContext: {},
+    },
+    actorExecutors: {
+      runner_actor: async () => ({
+        status: "success",
+        payload: {
+          complete: true,
+          usage: {
+            input_tokens: 120,
+            output_tokens: 80,
+            tool_calls: 2,
+            duration_ms: 450,
+          },
+        },
+      }),
+    },
+  });
+
+  const result = await engine.runSession({
+    sessionId: "session-runtime-events",
+    initialPayload: {
+      task: "Emit runtime events",
+    },
+  });
+
+  assert.equal(result.status, "success");
+
+  const lines = (await readFile(runtimeEventLogPath, "utf8"))
+    .trim()
+    .split("\n")
+    .filter((line) => line.length > 0);
+  assert.ok(lines.length >= 4);
+
+  const events = lines.map((line) => JSON.parse(line) as Record<string, unknown>);
+  const eventTypes = new Set(events.map((event) => String(event.type)));
+  assert.ok(eventTypes.has("session.started"));
+  assert.ok(eventTypes.has("node.attempt.completed"));
+  assert.ok(eventTypes.has("domain.validation_passed"));
+  assert.ok(eventTypes.has("session.completed"));
+
+  const nodeAttemptEvent = events.find((event) => event.type === "node.attempt.completed");
+  assert.ok(nodeAttemptEvent);
+  const usage = nodeAttemptEvent.usage as Record<string, unknown>;
+  assert.equal(usage.tokenInput, 120);
+  assert.equal(usage.tokenOutput, 80);
+  assert.equal(usage.toolCalls, 2);
+  assert.equal(usage.durationMs, 450);
+});