Merge remote-tracking branch 'giteahttps/codex/fix-claude-tool-casing-allowlist' into main

2026-02-24 12:53:53 -05:00
parent 691591d279 422e8fe5a5
commit 45374a033b
4 changed files with 120 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -267,6 +267,7 @@ jq -c 'select(.severity=="critical")' .ai_ops/events/runtime-events.ndjson
 - Every actor execution input now includes `security` helpers (`rulesEngine`, `createCommandExecutor(...)`) so executors can enforce shell/tool policy at the execution boundary.
 - Every actor execution input now includes `mcp` helpers (`resolvedConfig`, `resolveConfig(...)`, `filterToolsForProvider(...)`, `createClaudeCanUseTool()`) so provider adapters are filtered against `executionContext.allowedTools` before SDK calls.
 - For Claude-based executors, pass `input.mcp.filterToolsForProvider(...)` and `input.mcp.createClaudeCanUseTool()` into the SDK call path so unauthorized tools are never exposed and runtime bypass attempts trigger security violations.
+- Claude `canUseTool` permission checks normalize provider casing (`Bash` vs `bash`) before enforcing persona allowlists.
 - Pipeline behavior on `SecurityViolationError` is configurable:
  - `hard_abort` (default)
  - `validation_fail` (retry-unrolled remediation)
--- a/docs/security-middleware.md
+++ b/docs/security-middleware.md
@@ -40,6 +40,7 @@ This middleware provides a first-pass hardening layer for agent-executed shell c
 - `registry`: resolved runtime `McpRegistry`
 - `resolveConfig(...)`: centralized MCP config resolution with persona tool-clearance applied
 - `createClaudeCanUseTool()`: helper for Claude SDK `canUseTool` callback so each tool invocation is allowlist/banlist-enforced before execution
+  - Tool matching is case-insensitive at invocation time to handle provider-emitted names like `Bash` versus allowlist entries like `bash`.

 ## Known limits and TODOs

--- a/src/agents/pipeline.ts
+++ b/src/agents/pipeline.ts
@@ -489,6 +489,38 @@ function toToolNameCandidates(toolName: string): string[] {
  return dedupeStrings(candidates);
 }

+function buildCaseInsensitiveToolLookup(tools: readonly string[]): Map<string, string> {
+  const lookup = new Map<string, string>();
+  for (const tool of tools) {
+    const normalized = tool.trim().toLowerCase();
+    if (!normalized || lookup.has(normalized)) {
+      continue;
+    }
+    lookup.set(normalized, tool);
+  }
+  return lookup;
+}
+
+function resolveAllowedToolMatch(input: {
+  candidates: readonly string[];
+  allowset: ReadonlySet<string>;
+  caseInsensitiveLookup: ReadonlyMap<string, string>;
+}): string | undefined {
+  const direct = input.candidates.find((candidate) => input.allowset.has(candidate));
+  if (direct) {
+    return direct;
+  }
+
+  for (const candidate of input.candidates) {
+    const match = input.caseInsensitiveLookup.get(candidate.toLowerCase());
+    if (match) {
+      return match;
+    }
+  }
+
+  return undefined;
+}
+
 function defaultEventPayloadForStatus(status: ActorResultStatus): DomainEventPayload {
  if (status === "success") {
    return {
@@ -1299,6 +1331,7 @@ export class PipelineExecutor {
    attempt: number;
  }): ActorToolPermissionHandler {
    const allowset = new Set(input.allowedTools);
+    const caseInsensitiveAllowLookup = buildCaseInsensitiveToolLookup(input.allowedTools);
    const rulesEngine = this.securityContext?.rulesEngine;
    const toolPolicy = toAllowedToolPolicy(input.allowedTools);
    const toolAuditContext = {
@@ -1319,7 +1352,11 @@ export class PipelineExecutor {
      }

      const candidates = toToolNameCandidates(toolName);
-      const allowMatch = candidates.find((candidate) => allowset.has(candidate));
+      const allowMatch = resolveAllowedToolMatch({
+        candidates,
+        allowset,
+        caseInsensitiveLookup: caseInsensitiveAllowLookup,
+      });
      if (!allowMatch) {
        rulesEngine?.assertToolInvocationAllowed({
          tool: candidates[0] ?? toolName,
--- a/tests/orchestration-engine.test.ts
+++ b/tests/orchestration-engine.test.ts
@@ -940,6 +940,86 @@ test("propagates abort signal into actor execution and stops the run", async ()
  assert.equal(observedAbort, true);
 });

+test("createClaudeCanUseTool accepts tool casing differences from providers", async () => {
+  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-workspace-"));
+  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-state-"));
+  const projectContextPath = resolve(stateRoot, "project-context.json");
+
+  const manifest = {
+    schemaVersion: "1",
+    topologies: ["sequential"],
+    personas: [
+      {
+        id: "coder",
+        displayName: "Coder",
+        systemPromptTemplate: "Coder",
+        toolClearance: {
+          allowlist: ["bash"],
+          banlist: [],
+        },
+      },
+    ],
+    relationships: [],
+    topologyConstraints: {
+      maxDepth: 2,
+      maxRetries: 0,
+    },
+    pipeline: {
+      entryNodeId: "case-node",
+      nodes: [
+        {
+          id: "case-node",
+          actorId: "case_actor",
+          personaId: "coder",
+        },
+      ],
+      edges: [],
+    },
+  } as const;
+
+  const engine = new SchemaDrivenExecutionEngine({
+    manifest,
+    settings: {
+      workspaceRoot,
+      stateRoot,
+      projectContextPath,
+      maxChildren: 1,
+      maxDepth: 2,
+      maxRetries: 0,
+      runtimeContext: {},
+    },
+    actorExecutors: {
+      case_actor: async (input) => {
+        const canUseTool = input.mcp.createClaudeCanUseTool();
+        const allow = await canUseTool("Bash", {}, {
+          signal: new AbortController().signal,
+          toolUseID: "allow-bash",
+        });
+        assert.deepEqual(allow, {
+          behavior: "allow",
+          toolUseID: "allow-bash",
+        });
+
+        return {
+          status: "success",
+          payload: {
+            ok: true,
+          },
+        };
+      },
+    },
+  });
+
+  const result = await engine.runSession({
+    sessionId: "session-claude-tool-casing",
+    initialPayload: {
+      task: "verify tool casing",
+    },
+  });
+
+  assert.equal(result.status, "success");
+});
+
 test("hard-aborts pipeline on security violations by default", async () => {
  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-workspace-"));
  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-state-"));