Merge origin/main with local UI refactor integration

2026-02-25 00:38:19 -05:00
parent 659f3edcee 90725eaae8
commit 35e3f81327
42 changed files with 4886 additions and 188 deletions
--- a/tests/claude-observability.test.ts
+++ b/tests/claude-observability.test.ts
@@ -0,0 +1,296 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtemp, readFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type { SDKMessage } from "@anthropic-ai/claude-agent-sdk";
+import { ClaudeObservabilityLogger, summarizeClaudeMessage } from "../src/ui/claude-observability.js";
+
+test("summarizeClaudeMessage returns compact result metadata in summary mode", () => {
+  const message = {
+    type: "result",
+    subtype: "success",
+    stop_reason: "end_turn",
+    num_turns: 1,
+    total_cost_usd: 0.0012,
+    usage: {
+      input_tokens: 120,
+      output_tokens: 40,
+    },
+    result: "{\"status\":\"success\"}",
+    duration_ms: 40,
+    duration_api_ms: 32,
+    is_error: false,
+    modelUsage: {},
+    permission_denials: [],
+    uuid: "uuid-1",
+    session_id: "sdk-session-1",
+  } as unknown as SDKMessage;
+
+  const summary = summarizeClaudeMessage(message, "summary");
+
+  assert.equal(summary.messageType, "result");
+  assert.equal(summary.messageSubtype, "success");
+  assert.equal(summary.sdkSessionId, "sdk-session-1");
+  assert.equal(summary.summary, "Claude query result success.");
+  assert.equal(summary.data?.numTurns, 1);
+  const usage = summary.data?.usage as Record<string, unknown> | undefined;
+  assert.equal(usage?.input_tokens, 120);
+});
+
+test("summarizeClaudeMessage redacts sensitive fields in full mode", () => {
+  const message = {
+    type: "system",
+    subtype: "init",
+    session_id: "sdk-session-2",
+    uuid: "uuid-2",
+    apiKey: "top-secret",
+    nested: {
+      authToken: "really-secret",
+      ok: true,
+    },
+  } as unknown as SDKMessage;
+
+  const summary = summarizeClaudeMessage(message, "full");
+  const payload = summary.data?.message as Record<string, unknown> | undefined;
+  const nested = payload?.nested as Record<string, unknown> | undefined;
+
+  assert.equal(summary.messageType, "system");
+  assert.equal(summary.messageSubtype, "init");
+  assert.equal(payload?.apiKey, "[redacted]");
+  assert.equal(nested?.authToken, "[redacted]");
+  assert.equal(nested?.ok, true);
+});
+
+test("ClaudeObservabilityLogger samples tool_progress messages for stdout", () => {
+  const lines: string[] = [];
+  const originalLog = console.log;
+  const originalNow = Date.now;
+  let now = 1000;
+
+  console.log = (line?: unknown) => {
+    lines.push(String(line ?? ""));
+  };
+  Date.now = () => now;
+
+  try {
+    const logger = new ClaudeObservabilityLogger({
+      workspaceRoot: process.cwd(),
+      config: {
+        mode: "stdout",
+        verbosity: "summary",
+        logPath: ".ai_ops/events/claude-trace.ndjson",
+        includePartialMessages: false,
+        debug: false,
+      },
+    });
+
+    const context = {
+      sessionId: "session-a",
+      nodeId: "node-a",
+      attempt: 1,
+      depth: 0,
+    };
+
+    const makeMessage = (): SDKMessage =>
+      ({
+        type: "tool_progress",
+        tool_name: "Bash",
+        tool_use_id: "tool-1",
+        parent_tool_use_id: null,
+        elapsed_time_seconds: 1,
+        uuid: "uuid-tool",
+        session_id: "sdk-session-tool",
+      }) as unknown as SDKMessage;
+
+    logger.recordMessage({
+      context,
+      message: makeMessage(),
+    });
+
+    now += 300;
+    logger.recordMessage({
+      context,
+      message: makeMessage(),
+    });
+
+    now += 1200;
+    logger.recordMessage({
+      context,
+      message: makeMessage(),
+    });
+
+    assert.equal(lines.length, 2);
+    assert.match(lines[0] ?? "", /^\[claude-trace\] /);
+    assert.match(lines[1] ?? "", /"suppressedSinceLastEmit":1/);
+  } finally {
+    console.log = originalLog;
+    Date.now = originalNow;
+  }
+});
+
+test("ClaudeObservabilityLogger keeps assistant/user message records in file output", async () => {
+  const workspace = await mkdtemp(join(tmpdir(), "claude-obsv-test-"));
+  const logPath = ".ai_ops/events/claude-trace.ndjson";
+  const logger = new ClaudeObservabilityLogger({
+    workspaceRoot: workspace,
+    config: {
+      mode: "file",
+      verbosity: "summary",
+      logPath,
+      includePartialMessages: false,
+      debug: false,
+    },
+  });
+
+  const context = {
+    sessionId: "session-file",
+    nodeId: "node-file",
+    attempt: 1,
+    depth: 0,
+  };
+
+  logger.recordQueryStarted({
+    context,
+  });
+  logger.recordMessage({
+    context,
+    message: {
+      type: "assistant",
+      uuid: "assistant-1",
+      session_id: "sdk-file-1",
+      parent_tool_use_id: null,
+      message: {} as never,
+    } as unknown as SDKMessage,
+  });
+  logger.recordMessage({
+    context,
+    message: {
+      type: "user",
+      uuid: "user-1",
+      session_id: "sdk-file-1",
+      parent_tool_use_id: null,
+      message: {} as never,
+    } as unknown as SDKMessage,
+  });
+  logger.recordMessage({
+    context,
+    message: {
+      type: "result",
+      subtype: "success",
+      stop_reason: "end_turn",
+      num_turns: 1,
+      total_cost_usd: 0.0012,
+      usage: {
+        input_tokens: 100,
+        output_tokens: 20,
+      },
+      result: "{}",
+      duration_ms: 10,
+      duration_api_ms: 9,
+      is_error: false,
+      modelUsage: {},
+      permission_denials: [],
+      uuid: "result-1",
+      session_id: "sdk-file-1",
+    } as unknown as SDKMessage,
+  });
+  logger.recordQueryCompleted({
+    context,
+  });
+
+  await logger.close();
+
+  const filePath = join(workspace, logPath);
+  const content = await readFile(filePath, "utf8");
+  const lines = content.split(/\r?\n/).filter((line) => line.trim().length > 0);
+  const records = lines.map((line) => JSON.parse(line) as Record<string, unknown>);
+  const messageTypes = records
+    .map((record) => record.sdkMessageType)
+    .filter((value) => typeof value === "string");
+
+  assert.equal(messageTypes.includes("assistant"), true);
+  assert.equal(messageTypes.includes("user"), true);
+  assert.equal(messageTypes.includes("result"), true);
+});
+
+test("summarizeClaudeMessage maps task_notification system subtype", () => {
+  const message = {
+    type: "system",
+    subtype: "task_notification",
+    task_id: "task-1",
+    status: "completed",
+    output_file: "/tmp/out.txt",
+    summary: "Task complete",
+    uuid: "uuid-task",
+    session_id: "sdk-session-task",
+  } as unknown as SDKMessage;
+
+  const summary = summarizeClaudeMessage(message, "summary");
+
+  assert.equal(summary.messageType, "system");
+  assert.equal(summary.messageSubtype, "task_notification");
+  assert.equal(summary.summary, "Task notification: completed.");
+  assert.equal(summary.data?.taskId, "task-1");
+});
+
+test("ClaudeObservabilityLogger honors includePartialMessages for stream events", () => {
+  const lines: string[] = [];
+  const originalLog = console.log;
+  console.log = (line?: unknown) => {
+    lines.push(String(line ?? ""));
+  };
+
+  try {
+    const context = {
+      sessionId: "session-stream",
+      nodeId: "node-stream",
+      attempt: 1,
+      depth: 0,
+    };
+    const streamMessage = {
+      type: "stream_event",
+      event: {
+        type: "content_block_delta",
+      },
+      parent_tool_use_id: null,
+      uuid: "stream-1",
+      session_id: "sdk-session-stream",
+    } as unknown as SDKMessage;
+
+    const withoutPartial = new ClaudeObservabilityLogger({
+      workspaceRoot: process.cwd(),
+      config: {
+        mode: "stdout",
+        verbosity: "summary",
+        logPath: ".ai_ops/events/claude-trace.ndjson",
+        includePartialMessages: false,
+        debug: false,
+      },
+    });
+    withoutPartial.recordMessage({
+      context,
+      message: streamMessage,
+    });
+
+    const withPartial = new ClaudeObservabilityLogger({
+      workspaceRoot: process.cwd(),
+      config: {
+        mode: "stdout",
+        verbosity: "summary",
+        logPath: ".ai_ops/events/claude-trace.ndjson",
+        includePartialMessages: true,
+        debug: false,
+      },
+    });
+    withPartial.recordMessage({
+      context,
+      message: streamMessage,
+    });
+
+    assert.equal(lines.length, 1);
+    assert.match(lines[0] ?? "", /\"sdkMessageType\":\"stream_event\"/);
+  } finally {
+    console.log = originalLog;
+  }
+});
--- a/tests/claude-trace-store.test.ts
+++ b/tests/claude-trace-store.test.ts
@@ -0,0 +1,42 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtemp, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { filterClaudeTraceEvents, readClaudeTraceEvents } from "../src/ui/claude-trace-store.js";
+
+test("readClaudeTraceEvents parses and sorts ndjson records", async () => {
+  const workspace = await mkdtemp(join(tmpdir(), "claude-trace-store-"));
+  const logPath = join(workspace, "claude-trace.ndjson");
+  await writeFile(
+    logPath,
+    [
+      '{"timestamp":"2026-02-24T17:27:05.000Z","message":"later","sessionId":"s1"}',
+      'not-json',
+      '{"timestamp":"2026-02-24T17:26:00.000Z","message":"earlier","sessionId":"s1"}',
+      '{"message":"missing timestamp"}',
+    ].join("\n"),
+    "utf8",
+  );
+
+  const events = await readClaudeTraceEvents(logPath);
+  assert.equal(events.length, 2);
+  assert.equal(events[0]?.message, "earlier");
+  assert.equal(events[1]?.message, "later");
+});
+
+test("filterClaudeTraceEvents filters by session and limit", () => {
+  const events = [
+    { timestamp: "2026-02-24T17:00:00.000Z", message: "a", sessionId: "s1" },
+    { timestamp: "2026-02-24T17:01:00.000Z", message: "b", sessionId: "s2" },
+    { timestamp: "2026-02-24T17:02:00.000Z", message: "c", sessionId: "s1" },
+  ];
+
+  const filtered = filterClaudeTraceEvents(events, {
+    sessionId: "s1",
+    limit: 1,
+  });
+
+  assert.equal(filtered.length, 1);
+  assert.equal(filtered[0]?.message, "c");
+});
--- a/tests/config.test.ts
+++ b/tests/config.test.ts
@@ -12,6 +12,7 @@ test("loads defaults and freezes config", () => {

  assert.equal(config.agentManager.maxConcurrentAgents, 4);
  assert.equal(config.orchestration.maxDepth, 4);
+  assert.equal(config.orchestration.mergeConflictMaxAttempts, 2);
  assert.equal(config.provisioning.portRange.basePort, 36000);
  assert.equal(config.discovery.fileRelativePath, ".agent-context/resources.json");
  assert.equal(config.security.violationHandling, "hard_abort");
@@ -24,6 +25,12 @@ test("loads defaults and freezes config", () => {
    "session.failed",
  ]);
  assert.equal(config.provider.openAiAuthMode, "auto");
+  assert.equal(config.provider.claudeMaxTurns, 2);
+  assert.equal(config.provider.claudeObservability.mode, "off");
+  assert.equal(config.provider.claudeObservability.verbosity, "summary");
+  assert.equal(config.provider.claudeObservability.logPath, ".ai_ops/events/claude-trace.ndjson");
+  assert.equal(config.provider.claudeObservability.includePartialMessages, false);
+  assert.equal(config.provider.claudeObservability.debug, false);
  assert.equal(Object.isFrozen(config), true);
  assert.equal(Object.isFrozen(config.orchestration), true);
 });
@@ -49,6 +56,11 @@ test("validates security violation mode", () => {
  );
 });

+test("loads dangerous_warn_only security violation mode", () => {
+  const config = loadConfig({ AGENT_SECURITY_VIOLATION_MODE: "dangerous_warn_only" });
+  assert.equal(config.security.violationHandling, "dangerous_warn_only");
+});
+
 test("validates runtime discord severity mode", () => {
  assert.throws(
    () => loadConfig({ AGENT_RUNTIME_DISCORD_MIN_SEVERITY: "verbose" }),
@@ -56,6 +68,45 @@ test("validates runtime discord severity mode", () => {
  );
 });

+test("validates claude observability mode", () => {
+  assert.throws(
+    () => loadConfig({ CLAUDE_OBSERVABILITY_MODE: "stream" }),
+    /CLAUDE_OBSERVABILITY_MODE must be one of/,
+  );
+});
+
+test("validates CLAUDE_MAX_TURNS bounds", () => {
+  assert.throws(
+    () => loadConfig({ CLAUDE_MAX_TURNS: "0" }),
+    /CLAUDE_MAX_TURNS must be an integer >= 1/,
+  );
+});
+
+test("validates claude observability verbosity", () => {
+  assert.throws(
+    () => loadConfig({ CLAUDE_OBSERVABILITY_VERBOSITY: "verbose" }),
+    /CLAUDE_OBSERVABILITY_VERBOSITY must be one of/,
+  );
+});
+
+test("loads claude observability settings", () => {
+  const config = loadConfig({
+    CLAUDE_OBSERVABILITY_MODE: "both",
+    CLAUDE_OBSERVABILITY_VERBOSITY: "full",
+    CLAUDE_OBSERVABILITY_LOG_PATH: ".ai_ops/debug/claude.ndjson",
+    CLAUDE_OBSERVABILITY_INCLUDE_PARTIAL: "true",
+    CLAUDE_OBSERVABILITY_DEBUG: "true",
+    CLAUDE_OBSERVABILITY_DEBUG_LOG_PATH: ".ai_ops/debug/claude-sdk.log",
+  });
+
+  assert.equal(config.provider.claudeObservability.mode, "both");
+  assert.equal(config.provider.claudeObservability.verbosity, "full");
+  assert.equal(config.provider.claudeObservability.logPath, ".ai_ops/debug/claude.ndjson");
+  assert.equal(config.provider.claudeObservability.includePartialMessages, true);
+  assert.equal(config.provider.claudeObservability.debug, true);
+  assert.equal(config.provider.claudeObservability.debugLogPath, ".ai_ops/debug/claude-sdk.log");
+});
+
 test("prefers CLAUDE_CODE_OAUTH_TOKEN over ANTHROPIC_API_KEY", () => {
  const config = loadConfig({
    CLAUDE_CODE_OAUTH_TOKEN: "oauth-token",
@@ -127,3 +178,10 @@ test("validates AGENT_WORKTREE_TARGET_PATH against parent traversal", () => {
    /must not contain "\.\." path segments/,
  );
 });
+
+test("validates AGENT_MERGE_CONFLICT_MAX_ATTEMPTS bounds", () => {
+  assert.throws(
+    () => loadConfig({ AGENT_MERGE_CONFLICT_MAX_ATTEMPTS: "0" }),
+    /AGENT_MERGE_CONFLICT_MAX_ATTEMPTS must be an integer >= 1/,
+  );
+});
--- a/tests/orchestration-engine.test.ts
+++ b/tests/orchestration-engine.test.ts
@@ -380,6 +380,7 @@ test("injects resolved mcp/helpers and enforces Claude tool gate in actor execut
        );
        assert.deepEqual(allow, {
          behavior: "allow",
+          updatedInput: {},
          toolUseID: "allow-1",
        });

@@ -614,6 +615,7 @@ test("runs parallel topology blocks concurrently and routes via domain-event edg
          projectContextPatch: {
            enqueueTasks: [
              {
+                taskId: "task-integrate",
                id: "task-integrate",
                title: "Integrate feature branches",
                status: "pending",
@@ -996,6 +998,7 @@ test("createClaudeCanUseTool accepts tool casing differences from providers", as
        });
        assert.deepEqual(allow, {
          behavior: "allow",
+          updatedInput: {},
          toolUseID: "allow-bash",
        });

@@ -1019,6 +1022,88 @@ test("createClaudeCanUseTool accepts tool casing differences from providers", as
  assert.equal(result.status, "success");
 });

+test("dangerous_warn_only allows tool use outside persona allowlist", async () => {
+  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-workspace-"));
+  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-state-"));
+  const projectContextPath = resolve(stateRoot, "project-context.json");
+
+  const manifest = {
+    schemaVersion: "1",
+    topologies: ["sequential"],
+    personas: [
+      {
+        id: "reader",
+        displayName: "Reader",
+        systemPromptTemplate: "Reader",
+        toolClearance: {
+          allowlist: ["read_file"],
+          banlist: [],
+        },
+      },
+    ],
+    relationships: [],
+    topologyConstraints: {
+      maxDepth: 2,
+      maxRetries: 0,
+    },
+    pipeline: {
+      entryNodeId: "warn-node",
+      nodes: [
+        {
+          id: "warn-node",
+          actorId: "warn_actor",
+          personaId: "reader",
+        },
+      ],
+      edges: [],
+    },
+  } as const;
+
+  const engine = new SchemaDrivenExecutionEngine({
+    manifest,
+    settings: {
+      workspaceRoot,
+      stateRoot,
+      projectContextPath,
+      maxChildren: 1,
+      maxDepth: 2,
+      maxRetries: 0,
+      securityViolationHandling: "dangerous_warn_only",
+      runtimeContext: {},
+    },
+    actorExecutors: {
+      warn_actor: async (input) => {
+        const canUseTool = input.mcp.createClaudeCanUseTool();
+        const allow = await canUseTool("Bash", {}, {
+          signal: new AbortController().signal,
+          toolUseID: "allow-bash-warn",
+        });
+        assert.deepEqual(allow, {
+          behavior: "allow",
+          updatedInput: {},
+          toolUseID: "allow-bash-warn",
+        });
+
+        return {
+          status: "success",
+          payload: {
+            ok: true,
+          },
+        };
+      },
+    },
+  });
+
+  const result = await engine.runSession({
+    sessionId: "session-dangerous-warn-only",
+    initialPayload: {
+      task: "verify warn-only bypass",
+    },
+  });
+
+  assert.equal(result.status, "success");
+});
+
 test("hard-aborts pipeline on security violations by default", async () => {
  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-workspace-"));
  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-state-"));
--- a/tests/project-context.test.ts
+++ b/tests/project-context.test.ts
@@ -28,6 +28,7 @@ test("project context store reads defaults and applies domain patches", async ()
    },
    enqueueTasks: [
      {
+        taskId: "task-1",
        id: "task-1",
        title: "Build parser",
        status: "pending",
@@ -38,11 +39,13 @@ test("project context store reads defaults and applies domain patches", async ()
  const updated = await store.patchState({
    upsertTasks: [
      {
+        taskId: "task-1",
        id: "task-1",
        title: "Build parser",
        status: "in_progress",
      },
      {
+        taskId: "task-2",
        id: "task-2",
        title: "Add tests",
        status: "pending",
@@ -59,6 +62,35 @@ test("project context store reads defaults and applies domain patches", async ()
  assert.equal(updated.schemaVersion, 1);
 });

+test("project context accepts conflict-aware task statuses", async () => {
+  const root = await mkdtemp(resolve(tmpdir(), "ai-ops-project-context-conflict-"));
+  const store = new FileSystemProjectContextStore({
+    filePath: resolve(root, "project-context.json"),
+  });
+
+  const updated = await store.patchState({
+    upsertTasks: [
+      {
+        taskId: "task-conflict",
+        id: "task-conflict",
+        title: "Resolve merge conflict",
+        status: "conflict",
+      },
+      {
+        taskId: "task-resolving",
+        id: "task-resolving",
+        title: "Retry merge",
+        status: "resolving_conflict",
+      },
+    ],
+  });
+
+  assert.deepEqual(
+    updated.taskQueue.map((task) => `${task.taskId}:${task.status}`),
+    ["task-conflict:conflict", "task-resolving:resolving_conflict"],
+  );
+});
+
 test("project context parser merges missing root keys with defaults", async () => {
  const root = await mkdtemp(resolve(tmpdir(), "ai-ops-project-context-"));
  const filePath = resolve(root, "project-context.json");
@@ -70,6 +102,7 @@ test("project context parser merges missing root keys with defaults", async () =
      {
        taskQueue: [
          {
+            taskId: "task-1",
            id: "task-1",
            title: "Migrate",
            status: "pending",
--- a/tests/provider-adapters.test.ts
+++ b/tests/provider-adapters.test.ts
@@ -160,6 +160,7 @@ test("runClaudePrompt wires auth env, stream parsing, and output", async () => {
    ANTHROPIC_API_KEY: "legacy-api-key",
    CLAUDE_MODEL: "claude-sonnet-4-6",
    CLAUDE_CODE_PATH: "/usr/local/bin/claude",
+    CLAUDE_MAX_TURNS: "5",
  });

  let closed = false;
@@ -229,6 +230,7 @@ test("runClaudePrompt wires auth env, stream parsing, and output", async () => {
  assert.equal(queryInput?.prompt, "augmented prompt");
  assert.equal(queryInput?.options?.model, "claude-sonnet-4-6");
  assert.equal(queryInput?.options?.pathToClaudeCodeExecutable, "/usr/local/bin/claude");
+  assert.equal(queryInput?.options?.maxTurns, 5);
  assert.equal(queryInput?.options?.cwd, "/tmp/claude-worktree");
  assert.equal(queryInput?.options?.authToken, "oauth-token");
  assert.deepEqual(queryInput?.options?.mcpServers, sessionContext.mcp.claudeMcpServers);
--- a/tests/provider-executor.test.ts
+++ b/tests/provider-executor.test.ts
@@ -1,6 +1,17 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { parseActorExecutionResultFromModelOutput } from "../src/agents/provider-executor.js";
+import { mkdtemp } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { resolve } from "node:path";
+import { loadConfig } from "../src/config.js";
+import type { ActorExecutionInput } from "../src/agents/pipeline.js";
+import {
+  buildProviderRuntimeEnv,
+  createProviderRunRuntime,
+  parseActorExecutionResultFromModelOutput,
+  resolveProviderWorkingDirectory,
+  type ProviderRunRuntime,
+} from "../src/agents/provider-executor.js";

 test("parseActorExecutionResultFromModelOutput parses strict JSON payload", () => {
  const parsed = parseActorExecutionResultFromModelOutput({
@@ -103,3 +114,71 @@ test("parseActorExecutionResultFromModelOutput preserves status when optional fi
  assert.equal(parsed.failureKind, undefined);
  assert.equal(parsed.failureCode, undefined);
 });
+
+test("resolveProviderWorkingDirectory reads cwd from actor execution context", () => {
+  const actorInput = {
+    executionContext: {
+      security: {
+        worktreePath: "/tmp/session/tasks/product-intake",
+      },
+    },
+  } as unknown as ActorExecutionInput;
+
+  assert.equal(
+    resolveProviderWorkingDirectory(actorInput),
+    "/tmp/session/tasks/product-intake",
+  );
+});
+
+test("buildProviderRuntimeEnv scopes AGENT_WORKTREE_PATH to actor worktree and filters undefined auth", () => {
+  const config = loadConfig({
+    CLAUDE_CODE_OAUTH_TOKEN: "oauth-token",
+  });
+  const runtime = {
+    provider: "claude",
+    config,
+    sharedEnv: {
+      PATH: "/usr/bin",
+      KEEP_ME: "1",
+    },
+    claudeObservability: {} as ProviderRunRuntime["claudeObservability"],
+    close: async () => {},
+  } satisfies ProviderRunRuntime;
+  const actorInput = {
+    executionContext: {
+      security: {
+        worktreePath: "/tmp/session/tasks/product-intake",
+      },
+    },
+  } as unknown as ActorExecutionInput;
+
+  const env = buildProviderRuntimeEnv({
+    runtime,
+    actorInput,
+    includeClaudeAuth: true,
+  });
+
+  assert.equal(env.AGENT_WORKTREE_PATH, "/tmp/session/tasks/product-intake");
+  assert.equal(env.CLAUDE_CODE_OAUTH_TOKEN, "oauth-token");
+  assert.equal("ANTHROPIC_API_KEY" in env, false);
+  assert.equal(env.KEEP_ME, "1");
+});
+
+test("createProviderRunRuntime does not require session context provisioning", async () => {
+  const observabilityRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-provider-runtime-"));
+  const runtime = await createProviderRunRuntime({
+    provider: "claude",
+    config: loadConfig({}),
+    observabilityRootPath: observabilityRoot,
+    baseEnv: {
+      PATH: "/usr/bin",
+    },
+  });
+
+  try {
+    assert.equal(runtime.provider, "claude");
+    assert.equal(runtime.sharedEnv.PATH, "/usr/bin");
+  } finally {
+    await runtime.close();
+  }
+});
--- a/tests/run-service.test.ts
+++ b/tests/run-service.test.ts
@@ -1,9 +1,13 @@
 import test from "node:test";
 import assert from "node:assert/strict";
-import { mkdtemp, writeFile } from "node:fs/promises";
+import { execFile } from "node:child_process";
+import { mkdtemp, mkdir, stat, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { resolve } from "node:path";
 import { UiRunService, readRunMetaBySession } from "../src/runs/run-service.js";
+import { promisify } from "node:util";
+
+const execFileAsync = promisify(execFile);

 async function waitForTerminalRun(
  runService: UiRunService,
@@ -94,3 +98,140 @@ test("run service persists failure when pipeline summary is failure", async () =
  });
  assert.equal(persisted?.status, "failure");
 });
+
+test("run service creates, runs, and closes explicit sessions", async () => {
+  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-run-service-session-"));
+  const stateRoot = resolve(workspaceRoot, "state");
+  const envPath = resolve(workspaceRoot, ".env");
+  const projectPath = resolve(workspaceRoot, "project");
+
+  await mkdir(projectPath, { recursive: true });
+  await execFileAsync("git", ["init", projectPath], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "config", "user.name", "AI Ops"], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "config", "user.email", "ai-ops@example.local"], { encoding: "utf8" });
+  await writeFile(resolve(projectPath, "README.md"), "# project\n", "utf8");
+  await execFileAsync("git", ["-C", projectPath, "add", "README.md"], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "commit", "-m", "initial"], { encoding: "utf8" });
+
+  await writeFile(
+    envPath,
+    [
+      `AGENT_STATE_ROOT=${stateRoot}`,
+      "AGENT_WORKTREE_ROOT=.ai_ops/worktrees",
+      "AGENT_WORKTREE_BASE_REF=HEAD",
+    ].join("\n"),
+    "utf8",
+  );
+
+  const runService = new UiRunService({
+    workspaceRoot,
+    envFilePath: ".env",
+  });
+
+  const createdSession = await runService.createSession({
+    projectPath,
+  });
+  assert.equal(createdSession.sessionStatus, "active");
+
+  const manifest = {
+    schemaVersion: "1",
+    topologies: ["sequential"],
+    personas: [
+      {
+        id: "writer",
+        displayName: "Writer",
+        systemPromptTemplate: "Write draft",
+        toolClearance: {
+          allowlist: ["read_file", "write_file"],
+          banlist: [],
+        },
+      },
+    ],
+    relationships: [],
+    topologyConstraints: {
+      maxDepth: 1,
+      maxRetries: 0,
+    },
+    pipeline: {
+      entryNodeId: "write-node",
+      nodes: [
+        {
+          id: "write-node",
+          actorId: "writer-actor",
+          personaId: "writer",
+        },
+      ],
+      edges: [],
+    },
+  };
+
+  const started = await runService.startRun({
+    prompt: "complete task",
+    manifest,
+    sessionId: createdSession.sessionId,
+    executionMode: "mock",
+  });
+
+  const terminalStatus = await waitForTerminalRun(runService, started.runId);
+  assert.equal(terminalStatus, "success");
+
+  const closed = await runService.closeSession({
+    sessionId: createdSession.sessionId,
+  });
+  assert.equal(closed.sessionStatus, "closed");
+
+  await assert.rejects(() => stat(createdSession.baseWorkspacePath), {
+    code: "ENOENT",
+  });
+});
+
+test("run service marks session closed_with_conflicts when close merge conflicts", async () => {
+  const workspaceRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-run-service-close-conflict-"));
+  const stateRoot = resolve(workspaceRoot, "state");
+  const envPath = resolve(workspaceRoot, ".env");
+  const projectPath = resolve(workspaceRoot, "project");
+
+  await mkdir(projectPath, { recursive: true });
+  await execFileAsync("git", ["init", projectPath], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "config", "user.name", "AI Ops"], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "config", "user.email", "ai-ops@example.local"], { encoding: "utf8" });
+  await writeFile(resolve(projectPath, "README.md"), "base\n", "utf8");
+  await execFileAsync("git", ["-C", projectPath, "add", "README.md"], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "commit", "-m", "initial"], { encoding: "utf8" });
+
+  await writeFile(
+    envPath,
+    [
+      `AGENT_STATE_ROOT=${stateRoot}`,
+      "AGENT_WORKTREE_ROOT=.ai_ops/worktrees",
+      "AGENT_WORKTREE_BASE_REF=HEAD",
+    ].join("\n"),
+    "utf8",
+  );
+
+  const runService = new UiRunService({
+    workspaceRoot,
+    envFilePath: ".env",
+  });
+
+  const createdSession = await runService.createSession({
+    projectPath,
+  });
+
+  await writeFile(resolve(createdSession.baseWorkspacePath, "README.md"), "base branch update\n", "utf8");
+  await execFileAsync("git", ["-C", createdSession.baseWorkspacePath, "add", "README.md"], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", createdSession.baseWorkspacePath, "commit", "-m", "base update"], { encoding: "utf8" });
+
+  await writeFile(resolve(projectPath, "README.md"), "project branch update\n", "utf8");
+  await execFileAsync("git", ["-C", projectPath, "add", "README.md"], { encoding: "utf8" });
+  await execFileAsync("git", ["-C", projectPath, "commit", "-m", "project update"], { encoding: "utf8" });
+
+  const closed = await runService.closeSession({
+    sessionId: createdSession.sessionId,
+    mergeToProject: true,
+  });
+
+  assert.equal(closed.sessionStatus, "closed_with_conflicts");
+  const baseWorkspaceStats = await stat(createdSession.baseWorkspacePath);
+  assert.equal(baseWorkspaceStats.isDirectory(), true);
+});
--- a/tests/security-middleware.test.ts
+++ b/tests/security-middleware.test.ts
@@ -111,6 +111,42 @@ test("rules engine enforces binary allowlist, tool policy, and path boundaries",
  );
 });

+test("rules engine dangerous_warn_only logs but does not block violating shell commands", async () => {
+  const worktreeRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-security-warn-worktree-"));
+  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-security-warn-state-"));
+  const projectContextPath = resolve(stateRoot, "project-context.json");
+
+  const rules = new SecurityRulesEngine(
+    {
+      allowedBinaries: ["git"],
+      worktreeRoot,
+      protectedPaths: [stateRoot, projectContextPath],
+      requireCwdWithinWorktree: true,
+      rejectRelativePathTraversal: true,
+      enforcePathBoundaryOnArguments: true,
+      allowedEnvAssignments: [],
+      blockedEnvAssignments: [],
+    },
+    undefined,
+    {
+      violationHandling: "dangerous_warn_only",
+    },
+  );
+
+  const validated = await rules.validateShellCommand({
+    command: "unauthorized_bin --version",
+    cwd: worktreeRoot,
+    toolClearance: {
+      allowlist: ["git"],
+      banlist: [],
+    },
+  });
+
+  assert.equal(validated.cwd, worktreeRoot);
+  assert.equal(validated.parsed.commandCount, 0);
+  assert.deepEqual(validated.parsed.commands, []);
+});
+
 test("secure executor runs with explicit env policy", async () => {
  const worktreeRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-security-exec-"));

@@ -155,3 +191,85 @@ test("secure executor runs with explicit env policy", async () => {
  assert.equal(result.stdout, "ok|\n");
  assert.equal(streamedStdout, result.stdout);
 });
+
+test("rules engine carries session context in tool audit events", () => {
+  const events: Array<Record<string, unknown>> = [];
+  const rules = new SecurityRulesEngine(
+    {
+      allowedBinaries: ["git"],
+      worktreeRoot: "/tmp",
+      protectedPaths: [],
+      requireCwdWithinWorktree: true,
+      rejectRelativePathTraversal: true,
+      enforcePathBoundaryOnArguments: true,
+      allowedEnvAssignments: [],
+      blockedEnvAssignments: [],
+    },
+    (event) => {
+      events.push(event as unknown as Record<string, unknown>);
+    },
+  );
+
+  rules.assertToolInvocationAllowed({
+    tool: "git",
+    toolClearance: {
+      allowlist: ["git"],
+      banlist: [],
+    },
+    context: {
+      sessionId: "session-ctx",
+      nodeId: "node-ctx",
+      attempt: 2,
+    },
+  });
+
+  const allowedEvent = events.find((event) => event.type === "tool.invocation_allowed");
+  assert.ok(allowedEvent);
+  assert.equal(allowedEvent.sessionId, "session-ctx");
+  assert.equal(allowedEvent.nodeId, "node-ctx");
+  assert.equal(allowedEvent.attempt, 2);
+});
+
+test("rules engine applies tool clearance matching case-insensitively", () => {
+  const rules = new SecurityRulesEngine({
+    allowedBinaries: ["git"],
+    worktreeRoot: "/tmp",
+    protectedPaths: [],
+    requireCwdWithinWorktree: true,
+    rejectRelativePathTraversal: true,
+    enforcePathBoundaryOnArguments: true,
+    allowedEnvAssignments: [],
+    blockedEnvAssignments: [],
+  });
+
+  assert.doesNotThrow(() =>
+    rules.assertToolInvocationAllowed({
+      tool: "Bash",
+      toolClearance: {
+        allowlist: ["bash", "glob"],
+        banlist: [],
+      },
+    }),
+  );
+
+  assert.throws(
+    () =>
+      rules.assertToolInvocationAllowed({
+        tool: "Glob",
+        toolClearance: {
+          allowlist: ["bash", "glob"],
+          banlist: ["GLOB"],
+        },
+      }),
+    (error: unknown) =>
+      error instanceof SecurityViolationError && error.code === "TOOL_BANNED",
+  );
+
+  assert.deepEqual(
+    rules.filterAllowedTools(["Bash", "Glob", "Read"], {
+      allowlist: ["bash", "glob"],
+      banlist: ["gLoB"],
+    }),
+    ["Bash"],
+  );
+});
--- a/tests/session-lifecycle.test.ts
+++ b/tests/session-lifecycle.test.ts
@@ -0,0 +1,287 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { execFile } from "node:child_process";
+import { mkdtemp, mkdir, readFile, rm, stat, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { resolve } from "node:path";
+import { promisify } from "node:util";
+import {
+  FileSystemSessionMetadataStore,
+  SessionWorktreeManager,
+  type SessionMetadata,
+} from "../src/agents/session-lifecycle.js";
+
+const execFileAsync = promisify(execFile);
+
+async function git(args: string[]): Promise<string> {
+  const { stdout } = await execFileAsync("git", args, {
+    encoding: "utf8",
+  });
+  return stdout.trim();
+}
+
+test("session metadata store persists and updates session metadata", async () => {
+  const stateRoot = await mkdtemp(resolve(tmpdir(), "ai-ops-session-store-"));
+  const store = new FileSystemSessionMetadataStore({ stateRoot });
+
+  const created = await store.createSession({
+    sessionId: "session-abc",
+    projectPath: resolve(stateRoot, "project"),
+    baseWorkspacePath: resolve(stateRoot, "worktrees", "session-abc", "base"),
+  });
+
+  assert.equal(created.sessionStatus, "active");
+  assert.equal(created.sessionId, "session-abc");
+
+  const listed = await store.listSessions();
+  assert.equal(listed.length, 1);
+  assert.equal(listed[0]?.sessionId, "session-abc");
+
+  const updated = await store.updateSession("session-abc", {
+    sessionStatus: "closed",
+  });
+  assert.equal(updated.sessionStatus, "closed");
+
+  const readBack = await store.readSession("session-abc");
+  assert.equal(readBack?.sessionStatus, "closed");
+
+  const closedWithConflicts = await store.updateSession("session-abc", {
+    sessionStatus: "closed_with_conflicts",
+  });
+  assert.equal(closedWithConflicts.sessionStatus, "closed_with_conflicts");
+});
+
+test("session worktree manager provisions and merges task worktrees", async () => {
+  const root = await mkdtemp(resolve(tmpdir(), "ai-ops-session-worktree-"));
+  const projectPath = resolve(root, "project");
+  const worktreeRoot = resolve(root, "worktrees");
+
+  await mkdir(projectPath, { recursive: true });
+  await git(["init", projectPath]);
+  await git(["-C", projectPath, "config", "user.name", "AI Ops"]);
+  await git(["-C", projectPath, "config", "user.email", "ai-ops@example.local"]);
+  await writeFile(resolve(projectPath, "README.md"), "# project\n", "utf8");
+  await git(["-C", projectPath, "add", "README.md"]);
+  await git(["-C", projectPath, "commit", "-m", "initial commit"]);
+
+  const manager = new SessionWorktreeManager({
+    worktreeRoot,
+    baseRef: "HEAD",
+  });
+
+  const sessionId = "session-1";
+  const baseWorkspacePath = manager.resolveBaseWorkspacePath(sessionId);
+
+  await manager.initializeSessionBaseWorkspace({
+    sessionId,
+    projectPath,
+    baseWorkspacePath,
+  });
+
+  const baseStats = await stat(baseWorkspacePath);
+  assert.equal(baseStats.isDirectory(), true);
+
+  const taskWorktreePath = (
+    await manager.ensureTaskWorktree({
+      sessionId,
+      taskId: "task-1",
+      baseWorkspacePath,
+    })
+  ).taskWorktreePath;
+
+  await writeFile(resolve(taskWorktreePath, "feature.txt"), "task output\n", "utf8");
+
+  const mergeOutcome = await manager.mergeTaskIntoBase({
+    taskId: "task-1",
+    baseWorkspacePath,
+    taskWorktreePath,
+  });
+  assert.equal(mergeOutcome.kind, "success");
+
+  const mergedFile = await readFile(resolve(baseWorkspacePath, "feature.txt"), "utf8");
+  assert.equal(mergedFile, "task output\n");
+
+  const session: SessionMetadata = {
+    sessionId,
+    projectPath,
+    baseWorkspacePath,
+    sessionStatus: "active",
+    createdAt: new Date().toISOString(),
+    updatedAt: new Date().toISOString(),
+  };
+
+  const closeOutcome = await manager.closeSession({
+    session,
+    taskWorktreePaths: [],
+    mergeBaseIntoProject: false,
+  });
+  assert.equal(closeOutcome.kind, "success");
+
+  await assert.rejects(() => stat(baseWorkspacePath), {
+    code: "ENOENT",
+  });
+});
+
+test("session worktree manager returns conflict outcome instead of throwing", async () => {
+  const root = await mkdtemp(resolve(tmpdir(), "ai-ops-session-worktree-conflict-"));
+  const projectPath = resolve(root, "project");
+  const worktreeRoot = resolve(root, "worktrees");
+
+  await mkdir(projectPath, { recursive: true });
+  await git(["init", projectPath]);
+  await git(["-C", projectPath, "config", "user.name", "AI Ops"]);
+  await git(["-C", projectPath, "config", "user.email", "ai-ops@example.local"]);
+  await writeFile(resolve(projectPath, "README.md"), "base\n", "utf8");
+  await git(["-C", projectPath, "add", "README.md"]);
+  await git(["-C", projectPath, "commit", "-m", "initial commit"]);
+
+  const manager = new SessionWorktreeManager({
+    worktreeRoot,
+    baseRef: "HEAD",
+  });
+
+  const sessionId = "session-conflict-1";
+  const baseWorkspacePath = manager.resolveBaseWorkspacePath(sessionId);
+
+  await manager.initializeSessionBaseWorkspace({
+    sessionId,
+    projectPath,
+    baseWorkspacePath,
+  });
+
+  const taskWorktreePath = (
+    await manager.ensureTaskWorktree({
+      sessionId,
+      taskId: "task-conflict",
+      baseWorkspacePath,
+    })
+  ).taskWorktreePath;
+
+  await writeFile(resolve(baseWorkspacePath, "README.md"), "base branch change\n", "utf8");
+  await git(["-C", baseWorkspacePath, "add", "README.md"]);
+  await git(["-C", baseWorkspacePath, "commit", "-m", "base update"]);
+
+  await writeFile(resolve(taskWorktreePath, "README.md"), "task branch change\n", "utf8");
+
+  const mergeOutcome = await manager.mergeTaskIntoBase({
+    taskId: "task-conflict",
+    baseWorkspacePath,
+    taskWorktreePath,
+  });
+
+  assert.equal(mergeOutcome.kind, "conflict");
+  if (mergeOutcome.kind !== "conflict") {
+    throw new Error("Expected merge conflict outcome.");
+  }
+  assert.equal(mergeOutcome.taskId, "task-conflict");
+  assert.equal(mergeOutcome.worktreePath, taskWorktreePath);
+  assert.ok(mergeOutcome.conflictFiles.includes("README.md"));
+});
+
+test("session worktree manager recreates a task worktree after stale metadata prune", async () => {
+  const root = await mkdtemp(resolve(tmpdir(), "ai-ops-session-worktree-prune-"));
+  const projectPath = resolve(root, "project");
+  const worktreeRoot = resolve(root, "worktrees");
+
+  await mkdir(projectPath, { recursive: true });
+  await git(["init", projectPath]);
+  await git(["-C", projectPath, "config", "user.name", "AI Ops"]);
+  await git(["-C", projectPath, "config", "user.email", "ai-ops@example.local"]);
+  await writeFile(resolve(projectPath, "README.md"), "# project\n", "utf8");
+  await git(["-C", projectPath, "add", "README.md"]);
+  await git(["-C", projectPath, "commit", "-m", "initial commit"]);
+
+  const manager = new SessionWorktreeManager({
+    worktreeRoot,
+    baseRef: "HEAD",
+  });
+
+  const sessionId = "session-prune-1";
+  const taskId = "task-prune-1";
+  const baseWorkspacePath = manager.resolveBaseWorkspacePath(sessionId);
+
+  await manager.initializeSessionBaseWorkspace({
+    sessionId,
+    projectPath,
+    baseWorkspacePath,
+  });
+
+  const initialTaskWorktreePath = (
+    await manager.ensureTaskWorktree({
+      sessionId,
+      taskId,
+      baseWorkspacePath,
+    })
+  ).taskWorktreePath;
+
+  await rm(initialTaskWorktreePath, { recursive: true, force: true });
+
+  const recreatedTaskWorktreePath = (
+    await manager.ensureTaskWorktree({
+      sessionId,
+      taskId,
+      baseWorkspacePath,
+    })
+  ).taskWorktreePath;
+
+  assert.equal(recreatedTaskWorktreePath, initialTaskWorktreePath);
+  const stats = await stat(recreatedTaskWorktreePath);
+  assert.equal(stats.isDirectory(), true);
+});
+
+test("session worktree manager applies target path sparse checkout and task working directory", async () => {
+  const root = await mkdtemp(resolve(tmpdir(), "ai-ops-session-worktree-target-"));
+  const projectPath = resolve(root, "project");
+  const worktreeRoot = resolve(root, "worktrees");
+
+  await mkdir(resolve(projectPath, "app", "src"), { recursive: true });
+  await mkdir(resolve(projectPath, "infra"), { recursive: true });
+  await git(["init", projectPath]);
+  await git(["-C", projectPath, "config", "user.name", "AI Ops"]);
+  await git(["-C", projectPath, "config", "user.email", "ai-ops@example.local"]);
+  await writeFile(resolve(projectPath, "app", "src", "index.ts"), "export const app = true;\n", "utf8");
+  await writeFile(resolve(projectPath, "infra", "notes.txt"), "infra\n", "utf8");
+  await git(["-C", projectPath, "add", "."]);
+  await git(["-C", projectPath, "commit", "-m", "initial commit"]);
+
+  const manager = new SessionWorktreeManager({
+    worktreeRoot,
+    baseRef: "HEAD",
+    targetPath: "app",
+  });
+
+  const sessionId = "session-target-1";
+  const baseWorkspacePath = manager.resolveBaseWorkspacePath(sessionId);
+  await manager.initializeSessionBaseWorkspace({
+    sessionId,
+    projectPath,
+    baseWorkspacePath,
+  });
+
+  const baseWorkingDirectory = manager.resolveWorkingDirectoryForWorktree(baseWorkspacePath);
+  assert.equal(baseWorkingDirectory, resolve(baseWorkspacePath, "app"));
+  const baseWorkingStats = await stat(baseWorkingDirectory);
+  assert.equal(baseWorkingStats.isDirectory(), true);
+  await assert.rejects(() => stat(resolve(baseWorkspacePath, "infra")), {
+    code: "ENOENT",
+  });
+
+  const ensured = await manager.ensureTaskWorktree({
+    sessionId,
+    taskId: "task-target-1",
+    baseWorkspacePath,
+  });
+  assert.equal(ensured.taskWorkingDirectory, resolve(ensured.taskWorktreePath, "app"));
+
+  await writeFile(resolve(ensured.taskWorkingDirectory, "src", "feature.ts"), "export const feature = true;\n", "utf8");
+
+  const mergeOutcome = await manager.mergeTaskIntoBase({
+    taskId: "task-target-1",
+    baseWorkspacePath,
+    taskWorktreePath: ensured.taskWorktreePath,
+  });
+  assert.equal(mergeOutcome.kind, "success");
+
+  const merged = await readFile(resolve(baseWorkingDirectory, "src", "feature.ts"), "utf8");
+  assert.equal(merged, "export const feature = true;\n");
+});