Add per-part PII/secrets detection for multimodal messages (#47)

author Stefan Gasser <redacted>

Sat, 17 Jan 2026 19:32:54 +0000 (20:32 +0100)

committer GitHub <redacted>

Sat, 17 Jan 2026 19:32:54 +0000 (20:32 +0100)
author Stefan Gasser <redacted>
Sat, 17 Jan 2026 19:32:54 +0000 (20:32 +0100)
committer GitHub <redacted>
Sat, 17 Jan 2026 19:32:54 +0000 (20:32 +0100)
diff --git a/config.example.yaml b/config.example.yaml

index 6f09440cee76805cf44af83955321ffb6c0fd8f7..ec1b4e39807d59e9c9f1ddff033447b35655e930 100644 (file)
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -86,10 +86,10 @@ secrets_detection:
    enabled: true
  
    # Action to take when secrets are detected:
-  #   redact:       Replace secrets with placeholders, unmask in response (default)
+  #   mask:         Replace secrets with placeholders, unmask in response (default)
    #   block:        Block the request with HTTP 400
    #   route_local:  Route to local provider (only works in route mode)
-  action: redact
+  action: mask
  
    # Secret types to detect
    # Private Keys (enabled by default):
diff --git a/docs/api-reference/chat-completions.mdx b/docs/api-reference/chat-completions.mdx

index cd922be3921a910b62bfb666517574428534e56d..d98673f7b902612160ae121c43048e10d3c9b0a3 100644 (file)
--- a/docs/api-reference/chat-completions.mdx
+++ b/docs/api-reference/chat-completions.mdx
@@ -123,4 +123,4 @@ PasteGuard adds headers to indicate PII and secrets handling:
  | `X-PasteGuard-Language-Fallback` | `true` if configured language was not available |
  | `X-PasteGuard-Secrets-Detected` | `true` if secrets were found |
  | `X-PasteGuard-Secrets-Types` | Comma-separated list of detected secret types |
-| `X-PasteGuard-Secrets-Redacted` | `true` if secrets were redacted |
+| `X-PasteGuard-Secrets-Masked` | `true` if secrets were masked |
diff --git a/docs/concepts/secrets-detection.mdx b/docs/concepts/secrets-detection.mdx

index 0966b0164744e021dcea58465c9a707c74ff9e6d..0daca06d189d41430bd5609d5eeb0646c6b7b19f 100644 (file)
--- a/docs/concepts/secrets-detection.mdx
+++ b/docs/concepts/secrets-detection.mdx
@@ -5,7 +5,7 @@ description: Detect and protect private keys, API keys, tokens, and environment
  
  # Secrets Detection
  
-PasteGuard detects secrets before PII detection and can block, redact, or route requests containing sensitive credentials.
+PasteGuard detects secrets before PII detection and can block, mask, or route requests containing sensitive credentials.
  
  ## Supported Secret Types
  
@@ -43,15 +43,15 @@ PasteGuard detects secrets before PII detection and can block, redact, or route
  
  | Action | Description |
  |--------|-------------|
-| `redact` | Replace secrets with placeholders, restore in response (default) |
+| `mask` | Replace secrets with placeholders, restore in response (default) |
  | `block` | Return HTTP 400, request never reaches LLM |
  | `route_local` | Route to local LLM (requires route mode) |
  
-### Redact (Default)
+### Mask (Default)
  
  ```yaml
  secrets_detection:
-  action: redact
+  action: mask
  ```
  
  Secrets are replaced with placeholders and restored in the response (like PII masking).
@@ -85,8 +85,8 @@ X-PasteGuard-Secrets-Detected: true
  X-PasteGuard-Secrets-Types: OPENSSH_PRIVATE_KEY,API_KEY_OPENAI
  ```
  
-If secrets were redacted:
+If secrets were masked:
  
  ```
-X-PasteGuard-Secrets-Redacted: true
+X-PasteGuard-Secrets-Masked: true
  ```
diff --git a/docs/configuration/secrets-detection.mdx b/docs/configuration/secrets-detection.mdx

index c81352370af4d3996a5c7d51f83f913ccb36ce95..d5b3f62691ac96504461bd1c35fc955cd99b4164 100644 (file)
--- a/docs/configuration/secrets-detection.mdx
+++ b/docs/configuration/secrets-detection.mdx
@@ -8,7 +8,7 @@ description: Configure detection of private keys, API keys, tokens, and environm
  ```yaml
  secrets_detection:
    enabled: true
-  action: redact
+  action: mask
    entities:
      - OPENSSH_PRIVATE_KEY
      - PEM_PRIVATE_KEY
@@ -21,7 +21,7 @@ secrets_detection:
  | Option | Default | Description |
  |--------|---------|-------------|
  | `enabled` | `true` | Enable secrets detection |
-| `action` | `redact` | Action when secrets found |
+| `action` | `mask` | Action when secrets found |
  | `entities` | Private keys | Secret types to detect |
  | `max_scan_chars` | `200000` | Max characters to scan (0 = unlimited) |
  | `log_detected_types` | `true` | Log detected types (never logs content) |
@@ -30,15 +30,15 @@ secrets_detection:
  
  | Action | Description |
  |--------|-------------|
-| `redact` | Replace secrets with placeholders, restore in response (default) |
+| `mask` | Replace secrets with placeholders, restore in response (default) |
  | `block` | Return HTTP 400, request never reaches LLM |
  | `route_local` | Route to local LLM (requires route mode) |
  
-### Redact (Default)
+### Mask (Default)
  
  ```yaml
  secrets_detection:
-  action: redact
+  action: mask
  ```
  
  ### Block
diff --git a/src/config.ts b/src/config.ts

index 88698f1620405dc507a33dcf3c3ce82db8b94937..461c188f5a8c5f61b483e5581dc089ded204958e 100644 (file)
--- a/src/config.ts
+++ b/src/config.ts
@@ -1,6 +1,7 @@
  import { existsSync, readFileSync, statSync } from "node:fs";
  import { parse as parseYaml } from "yaml";
  import { z } from "zod";
+import { SUPPORTED_LANGUAGES } from "./constants/languages";
  
  // Schema definitions
  
@@ -23,36 +24,7 @@ const MaskingSchema = z.object({
    marker_text: z.string().default("[protected]"),
  });
  
-// All 25 spaCy languages with trained pipelines
-// See docker/presidio/languages.yaml for full list
-const SupportedLanguages = [
-  "ca", // Catalan
-  "zh", // Chinese
-  "hr", // Croatian
-  "da", // Danish
-  "nl", // Dutch
-  "en", // English
-  "fi", // Finnish
-  "fr", // French
-  "de", // German
-  "el", // Greek
-  "it", // Italian
-  "ja", // Japanese
-  "ko", // Korean
-  "lt", // Lithuanian
-  "mk", // Macedonian
-  "nb", // Norwegian
-  "pl", // Polish
-  "pt", // Portuguese
-  "ro", // Romanian
-  "ru", // Russian
-  "sl", // Slovenian
-  "es", // Spanish
-  "sv", // Swedish
-  "uk", // Ukrainian
-] as const;
-
-const LanguageEnum = z.enum(SupportedLanguages);
+const LanguageEnum = z.enum(SUPPORTED_LANGUAGES);
  
  // Accept either array or comma-separated string for languages
  // This allows using env vars like PASTEGUARD_LANGUAGES=en,de,fr
@@ -60,7 +32,7 @@ const LanguagesSchema = z
    .union([z.array(LanguageEnum), z.string()])
    .transform((val) => {
      if (Array.isArray(val)) return val;
-    return val.split(",").map((s) => s.trim()) as (typeof SupportedLanguages)[number][];
+    return val.split(",").map((s) => s.trim()) as (typeof SUPPORTED_LANGUAGES)[number][];
    })
    .pipe(z.array(LanguageEnum))
    .default(["en"]);
@@ -121,7 +93,7 @@ const SecretEntityTypes = [
  
  const SecretsDetectionSchema = z.object({
    enabled: z.boolean().default(true),
-  action: z.enum(["block", "redact", "route_local"]).default("redact"),
+  action: z.enum(["block", "mask", "route_local"]).default("mask"),
    entities: z.array(z.enum(SecretEntityTypes)).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]),
    max_scan_chars: z.coerce.number().int().min(0).default(200000),
    log_detected_types: z.boolean().default(true),
@@ -165,7 +137,7 @@ const ConfigSchema = z
      },
      {
        message:
-        "secrets_detection.action 'route_local' is not compatible with mode 'mask'. Use mode 'route' or change secrets_detection.action to 'block' or 'redact'",
+        "secrets_detection.action 'route_local' is not compatible with mode 'mask'. Use mode 'route' or change secrets_detection.action to 'block' or 'mask'",
      },
    );
  
diff --git a/src/constants/languages.ts b/src/constants/languages.ts

new file mode 100644 (file)

index 0000000..56b214f
--- /dev/null
+++ b/src/constants/languages.ts
@@ -0,0 +1,32 @@
+/**
+ * All 24 spaCy languages with trained pipelines
+ * See docker/presidio/languages.yaml for full list
+ */
+export const SUPPORTED_LANGUAGES = [
+  "ca", // Catalan
+  "zh", // Chinese
+  "hr", // Croatian
+  "da", // Danish
+  "nl", // Dutch
+  "en", // English
+  "fi", // Finnish
+  "fr", // French
+  "de", // German
+  "el", // Greek
+  "it", // Italian
+  "ja", // Japanese
+  "ko", // Korean
+  "lt", // Lithuanian
+  "mk", // Macedonian
+  "nb", // Norwegian
+  "pl", // Polish
+  "pt", // Portuguese
+  "ro", // Romanian
+  "ru", // Russian
+  "sl", // Slovenian
+  "es", // Spanish
+  "sv", // Swedish
+  "uk", // Ukrainian
+] as const;
+
+export type SupportedLanguage = (typeof SUPPORTED_LANGUAGES)[number];
diff --git a/src/index.ts b/src/index.ts

index 003ea381f6cea5c67cc270f51a65a6bc43fcbb6e..c04113557595bae6ed2a5fa1d298370df21ee8ba 100644 (file)
--- a/src/index.ts
+++ b/src/index.ts
@@ -4,12 +4,12 @@ import { createMiddleware } from "hono/factory";
  import { HTTPException } from "hono/http-exception";
  import { logger } from "hono/logger";
  import { getConfig } from "./config";
+import { getPIIDetector } from "./pii/detect";
  import { dashboardRoutes } from "./routes/dashboard";
  import { healthRoutes } from "./routes/health";
  import { infoRoutes } from "./routes/info";
  import { proxyRoutes } from "./routes/proxy";
  import { getLogger } from "./services/logger";
-import { getPIIDetector } from "./services/pii-detector";
  
  type Variables = {
    requestId: string;
@@ -106,9 +106,7 @@ async function validateStartup() {
    if (config.secrets_detection.action === "route_local" && config.mode === "mask") {
      console.error("\n❌ Configuration error detected!\n");
      console.error("   secrets_detection.action 'route_local' is not compatible with mode 'mask'.");
-    console.error(
-      "   Use mode 'route' or change secrets_detection.action to 'block' or 'redact'.\n",
-    );
+    console.error("   Use mode 'route' or change secrets_detection.action to 'block' or 'mask'.\n");
      console.error("[STARTUP] ✗ Invalid configuration. Exiting for safety.");
      process.exit(1);
    }
diff --git a/src/services/pii-detector.test.ts b/src/pii/detect.test.ts

similarity index 79%

rename from src/services/pii-detector.test.ts

rename to src/pii/detect.test.ts

index 6c748d73cffaefbbe149088c5d3d910e13759eb8..46be2ffed6ef22e3652d7402c6108bdac0db5d53 100644 (file)
--- a/src/services/pii-detector.test.ts
+++ b/src/pii/detect.test.ts
@@ -1,5 +1,5 @@
  import { afterEach, describe, expect, mock, test } from "bun:test";
-import { PIIDetector } from "./pii-detector";
+import { PIIDetector } from "./detect";
  
  const originalFetch = globalThis.fetch;
  
@@ -62,10 +62,16 @@ describe("PIIDetector", () => {
        const result = await detector.analyzeMessages(messages);
  
        expect(result.hasPII).toBe(true);
-      expect(result.entitiesByMessage).toHaveLength(3);
-      expect(result.entitiesByMessage[0]).toHaveLength(1);
-      expect(result.entitiesByMessage[1]).toHaveLength(1);
-      expect(result.entitiesByMessage[2]).toHaveLength(1);
+      // Per-message, per-part: messageEntities[msgIdx][partIdx] = entities
+      expect(result.messageEntities).toHaveLength(3);
+      // Each message has 1 part (string content)
+      expect(result.messageEntities[0]).toHaveLength(1);
+      expect(result.messageEntities[1]).toHaveLength(1);
+      expect(result.messageEntities[2]).toHaveLength(1);
+      // Each part has 1 entity
+      expect(result.messageEntities[0][0]).toHaveLength(1);
+      expect(result.messageEntities[1][0]).toHaveLength(1);
+      expect(result.messageEntities[2][0]).toHaveLength(1);
      });
  
      test("detects PII in system message when user message has none", async () => {
@@ -82,8 +88,8 @@ describe("PIIDetector", () => {
        const result = await detector.analyzeMessages(messages);
  
        expect(result.hasPII).toBe(true);
-      expect(result.entitiesByMessage[0]).toHaveLength(1);
-      expect(result.entitiesByMessage[0][0].entity_type).toBe("PERSON");
+      expect(result.messageEntities[0][0]).toHaveLength(1);
+      expect(result.messageEntities[0][0][0].entity_type).toBe("PERSON");
      });
  
      test("detects PII in earlier user message", async () => {
@@ -101,7 +107,7 @@ describe("PIIDetector", () => {
        const result = await detector.analyzeMessages(messages);
  
        expect(result.hasPII).toBe(true);
-      expect(result.entitiesByMessage[0]).toHaveLength(1);
+      expect(result.messageEntities[0][0]).toHaveLength(1);
      });
  
      test("returns empty result for no messages", async () => {
@@ -111,8 +117,8 @@ describe("PIIDetector", () => {
        const result = await detector.analyzeMessages([]);
  
        expect(result.hasPII).toBe(false);
-      expect(result.entitiesByMessage).toHaveLength(0);
-      expect(result.newEntities).toHaveLength(0);
+      expect(result.messageEntities).toHaveLength(0);
+      expect(result.allEntities).toHaveLength(0);
      });
  
      test("handles multimodal content", async () => {
@@ -134,7 +140,12 @@ describe("PIIDetector", () => {
        const result = await detector.analyzeMessages(messages);
  
        expect(result.hasPII).toBe(true);
-      expect(result.entitiesByMessage[0]).toHaveLength(1);
+      // Multimodal message has 2 parts
+      expect(result.messageEntities[0]).toHaveLength(2);
+      // First part (text) has 1 entity
+      expect(result.messageEntities[0][0]).toHaveLength(1);
+      // Second part (image) has no entities
+      expect(result.messageEntities[0][1]).toHaveLength(0);
      });
  
      test("skips messages with empty content", async () => {
@@ -150,8 +161,10 @@ describe("PIIDetector", () => {
  
        const result = await detector.analyzeMessages(messages);
  
-      expect(result.entitiesByMessage).toHaveLength(2);
-      expect(result.entitiesByMessage[0]).toHaveLength(0);
+      expect(result.messageEntities).toHaveLength(2);
+      // First message (empty string) has 1 part with no entities
+      expect(result.messageEntities[0]).toHaveLength(1);
+      expect(result.messageEntities[0][0]).toHaveLength(0);
      });
    });
  
diff --git a/src/services/pii-detector.ts b/src/pii/detect.ts

similarity index 75%

rename from src/services/pii-detector.ts

rename to src/pii/detect.ts

index 444f1309a86d9c7facc1591c14b601a9fdc9ac46..ae078f51210800372841471ec91e08572040eb74 100644 (file)
--- a/src/services/pii-detector.ts
+++ b/src/pii/detect.ts
@@ -1,6 +1,6 @@
  import { getConfig } from "../config";
+import { getLanguageDetector, type SupportedLanguage } from "../services/language-detector";
  import { extractTextContent, type MessageContent } from "../utils/content";
-import { getLanguageDetector, type SupportedLanguage } from "./language-detector";
  
  export interface PIIEntity {
    entity_type: string;
@@ -16,10 +16,16 @@ interface AnalyzeRequest {
    score_threshold?: number;
  }
  
+/**
+ * Per-message, per-part PII detection result
+ * Structure: messageEntities[msgIdx][partIdx] = entities for that part
+ */
  export interface PIIDetectionResult {
    hasPII: boolean;
-  entitiesByMessage: PIIEntity[][];
-  newEntities: PIIEntity[];
+  /** Per-message, per-part entities */
+  messageEntities: PIIEntity[][][];
+  /** Flattened list of all entities (for summary/logging) */
+  allEntities: PIIEntity[];
    scanTimeMs: number;
    language: SupportedLanguage;
    languageFallback: boolean;
@@ -78,33 +84,65 @@ export class PIIDetector {
      }
    }
  
+  /**
+   * Analyzes messages for PII with per-part granularity
+   *
+   * For string content, entities are in messageEntities[msgIdx][0].
+   * For array content (multimodal), each text part is scanned separately.
+   */
    async analyzeMessages(
      messages: Array<{ role: string; content: MessageContent }>,
    ): Promise<PIIDetectionResult> {
      const startTime = Date.now();
      const config = getConfig();
  
+    // Detect language from the last user message
      const lastUserMsg = messages.findLast((m) => m.role === "user");
      const langText = lastUserMsg ? extractTextContent(lastUserMsg.content) : "";
      const langResult = langText
        ? getLanguageDetector().detect(langText)
        : { language: config.pii_detection.fallback_language, usedFallback: true };
  
-    const scannedRoles = ["system", "developer", "user", "assistant"];
+    const scannedRoles = ["system", "developer", "user", "assistant", "tool"];
  
-    const entitiesByMessage = await Promise.all(
-      messages.map((message) => {
-        const text = extractTextContent(message.content);
-        return text && scannedRoles.includes(message.role)
-          ? this.detectPII(text, langResult.language)
-          : Promise.resolve([]);
+    // Detect PII per message, per content part
+    const messageEntities: PIIEntity[][][] = await Promise.all(
+      messages.map(async (message) => {
+        if (!scannedRoles.includes(message.role)) {
+          return [];
+        }
+
+        // String content → wrap in single-element array
+        if (typeof message.content === "string") {
+          const entities = message.content
+            ? await this.detectPII(message.content, langResult.language)
+            : [];
+          return [entities];
+        }
+
+        // Array content (multimodal) → per-part detection
+        if (Array.isArray(message.content)) {
+          return await Promise.all(
+            message.content.map(async (part) => {
+              if (part.type === "text" && typeof part.text === "string") {
+                return await this.detectPII(part.text, langResult.language);
+              }
+              return [];
+            }),
+          );
+        }
+
+        // Null/undefined content
+        return [];
        }),
      );
  
+    const allEntities = messageEntities.flat(2);
+
      return {
-      hasPII: entitiesByMessage.some((e) => e.length > 0),
-      entitiesByMessage,
-      newEntities: entitiesByMessage.flat(),
+      hasPII: allEntities.length > 0,
+      messageEntities,
+      allEntities,
        scanTimeMs: Date.now() - startTime,
        language: langResult.language,
        languageFallback: langResult.usedFallback,
diff --git a/src/pii/mask.test.ts b/src/pii/mask.test.ts

new file mode 100644 (file)

index 0000000..fdd582e
--- /dev/null
+++ b/src/pii/mask.test.ts
@@ -0,0 +1,347 @@
+import { describe, expect, test } from "bun:test";
+import type { MaskingConfig } from "../config";
+import type { ChatMessage } from "../services/llm-client";
+import { createPIIResult } from "../test-utils/detection-results";
+import type { PIIEntity } from "./detect";
+import {
+  createMaskingContext,
+  flushMaskingBuffer,
+  mask,
+  maskMessages,
+  unmask,
+  unmaskResponse,
+  unmaskStreamChunk,
+} from "./mask";
+
+const defaultConfig: MaskingConfig = {
+  show_markers: false,
+  marker_text: "[protected]",
+};
+
+const configWithMarkers: MaskingConfig = {
+  show_markers: true,
+  marker_text: "[protected]",
+};
+
+describe("PII placeholder format", () => {
+  test("uses [[TYPE_N]] format", () => {
+    const entities: PIIEntity[] = [{ entity_type: "EMAIL_ADDRESS", start: 0, end: 16, score: 1.0 }];
+    const result = mask("john@example.com", entities);
+
+    expect(result.masked).toBe("[[EMAIL_ADDRESS_1]]");
+  });
+
+  test("increments counter per entity type", () => {
+    const entities: PIIEntity[] = [
+      { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 },
+      { entity_type: "EMAIL_ADDRESS", start: 12, end: 19, score: 1.0 },
+    ];
+
+    const result = mask("a@b.com and c@d.com", entities);
+
+    expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_2]]");
+  });
+
+  test("tracks different entity types separately", () => {
+    const entities: PIIEntity[] = [
+      { entity_type: "PERSON", start: 0, end: 11, score: 0.9 },
+      { entity_type: "EMAIL_ADDRESS", start: 13, end: 26, score: 1.0 },
+    ];
+
+    const result = mask("Hans Müller: hans@firma.de", entities);
+
+    expect(result.masked).toBe("[[PERSON_1]]: [[EMAIL_ADDRESS_1]]");
+  });
+});
+
+describe("marker feature", () => {
+  test("adds markers when show_markers is true", () => {
+    const context = createMaskingContext();
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
+
+    const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, configWithMarkers);
+    expect(result).toBe("Email: [protected]john@example.com");
+  });
+
+  test("no markers when show_markers is false", () => {
+    const context = createMaskingContext();
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
+
+    const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, defaultConfig);
+    expect(result).toBe("Email: john@example.com");
+  });
+
+  test("markers work with streaming", () => {
+    const context = createMaskingContext();
+    context.mapping["[[PERSON_1]]"] = "John Doe";
+
+    const { output } = unmaskStreamChunk("", "Hello [[PERSON_1]]!", context, configWithMarkers);
+    expect(output).toBe("Hello [protected]John Doe!");
+  });
+
+  test("markers work with response unmasking", () => {
+    const context = createMaskingContext();
+    context.mapping["[[PERSON_1]]"] = "John Doe";
+
+    const response = {
+      id: "test",
+      object: "chat.completion" as const,
+      created: 1234567890,
+      model: "gpt-4",
+      choices: [
+        {
+          index: 0,
+          message: { role: "assistant" as const, content: "Hello [[PERSON_1]]" },
+          finish_reason: "stop" as const,
+        },
+      ],
+    };
+
+    const result = unmaskResponse(response, context, configWithMarkers);
+    expect(result.choices[0].message.content).toBe("Hello [protected]John Doe");
+  });
+});
+
+describe("maskMessages with PIIDetectionResult", () => {
+  test("masks multiple messages using detection result", () => {
+    const messages: ChatMessage[] = [
+      { role: "user", content: "My email is test@example.com" },
+      { role: "assistant", content: "Got it" },
+      { role: "user", content: "Also john@test.com" },
+    ];
+
+    const detection = createPIIResult([
+      [[{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 1.0 }]],
+      [[]],
+      [[{ entity_type: "EMAIL_ADDRESS", start: 5, end: 18, score: 1.0 }]],
+    ]);
+
+    const { masked, context } = maskMessages(messages, detection);
+
+    expect(masked[0].content).toBe("My email is [[EMAIL_ADDRESS_1]]");
+    expect(masked[1].content).toBe("Got it");
+    expect(masked[2].content).toBe("Also [[EMAIL_ADDRESS_2]]");
+    expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("test@example.com");
+    expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("john@test.com");
+  });
+
+  test("handles multimodal content", () => {
+    const messages: ChatMessage[] = [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: "Contact john@test.com" },
+          { type: "image_url", image_url: { url: "https://example.com/img.jpg" } },
+        ],
+      },
+    ];
+
+    const detection = createPIIResult([
+      [[{ entity_type: "EMAIL_ADDRESS", start: 8, end: 21, score: 1.0 }], []],
+    ]);
+
+    const { masked } = maskMessages(messages, detection);
+
+    const content = masked[0].content as Array<{ type: string; text?: string }>;
+    expect(content[0].text).toBe("Contact [[EMAIL_ADDRESS_1]]");
+    expect(content[1].type).toBe("image_url");
+  });
+});
+
+describe("streaming with PII placeholders", () => {
+  test("buffers partial [[TYPE placeholder", () => {
+    const context = createMaskingContext();
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+
+    const { output, remainingBuffer } = unmaskStreamChunk(
+      "",
+      "Hello [[EMAIL_ADD",
+      context,
+      defaultConfig,
+    );
+
+    expect(output).toBe("Hello ");
+    expect(remainingBuffer).toBe("[[EMAIL_ADD");
+  });
+
+  test("completes buffered placeholder across chunks", () => {
+    const context = createMaskingContext();
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+
+    const { output, remainingBuffer } = unmaskStreamChunk(
+      "[[EMAIL_ADD",
+      "RESS_1]] there",
+      context,
+      defaultConfig,
+    );
+
+    expect(output).toBe("test@test.com there");
+    expect(remainingBuffer).toBe("");
+  });
+
+  test("flushes remaining buffer at end of stream", () => {
+    const context = createMaskingContext();
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+
+    const flushed = flushMaskingBuffer("[[EMAIL_ADD", context, defaultConfig);
+    expect(flushed).toBe("[[EMAIL_ADD");
+  });
+});
+
+describe("PII conflict resolution", () => {
+  test("handles overlapping entities with same start - keeps longer", () => {
+    const text = "Given Eric's feedback";
+    const entities: PIIEntity[] = [
+      { entity_type: "PERSON", start: 6, end: 10, score: 0.85 },
+      { entity_type: "PERSON", start: 6, end: 12, score: 0.8 },
+    ];
+
+    const { masked, context } = mask(text, entities);
+
+    expect(masked).toBe("Given [[PERSON_1]] feedback");
+    expect(context.mapping["[[PERSON_1]]"]).toBe("Eric's");
+  });
+
+  test("handles partially overlapping entities of same type - merges them", () => {
+    const text = "Contact John Smith Jones please";
+    const entities: PIIEntity[] = [
+      { entity_type: "PERSON", start: 8, end: 18, score: 0.9 },
+      { entity_type: "PERSON", start: 13, end: 25, score: 0.7 },
+    ];
+
+    const { masked } = mask(text, entities);
+
+    expect(masked).toBe("Contact [[PERSON_1]]please");
+  });
+
+  test("keeps adjacent non-overlapping entities", () => {
+    const text = "HansMüller";
+    const entities: PIIEntity[] = [
+      { entity_type: "PERSON", start: 0, end: 4, score: 0.9 },
+      { entity_type: "PERSON", start: 4, end: 10, score: 0.9 },
+    ];
+
+    const { masked } = mask(text, entities);
+
+    expect(masked).toBe("[[PERSON_1]][[PERSON_2]]");
+  });
+});
+
+describe("mask -> unmask roundtrip", () => {
+  test("preserves original data through roundtrip", () => {
+    const originalText = "Contact Hans Müller at hans@firma.de or call +49123456789";
+    const entities: PIIEntity[] = [
+      { entity_type: "PERSON", start: 8, end: 19, score: 0.9 },
+      { entity_type: "EMAIL_ADDRESS", start: 23, end: 36, score: 1.0 },
+      { entity_type: "PHONE_NUMBER", start: 45, end: 57, score: 0.95 },
+    ];
+
+    const { masked, context } = mask(originalText, entities);
+
+    expect(masked).not.toContain("Hans Müller");
+    expect(masked).not.toContain("hans@firma.de");
+    expect(masked).not.toContain("+49123456789");
+
+    const llmResponse = `I see ${masked.match(/\[\[PERSON_1\]\]/)?.[0]}, email ${masked.match(/\[\[EMAIL_ADDRESS_1\]\]/)?.[0]}`;
+    const unmasked = unmask(llmResponse, context, defaultConfig);
+
+    expect(unmasked).toContain("Hans Müller");
+    expect(unmasked).toContain("hans@firma.de");
+  });
+});
+
+describe("HTML context handling", () => {
+  test("unmasks placeholders in HTML without encoding issues", () => {
+    const context = createMaskingContext();
+    context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah.chen@hospital.org";
+
+    const htmlResponse = `<p>Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]</p>`;
+    const result = unmask(htmlResponse, context, defaultConfig);
+
+    expect(result).toBe("<p>Contact Dr. Sarah Chen at sarah.chen@hospital.org</p>");
+  });
+
+  test("works with complex HTML structures", () => {
+    const context = createMaskingContext();
+    context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah@hospital.org";
+
+    const complexHtml = `
+      <div class="profile">
+        <h1>[[PERSON_1]]</h1>
+        <a href="mailto:[[EMAIL_ADDRESS_1]]">[[EMAIL_ADDRESS_1]]</a>
+      </div>
+    `;
+
+    const result = unmask(complexHtml, context, defaultConfig);
+
+    expect(result).toContain("Dr. Sarah Chen");
+    expect(result).toContain("sarah@hospital.org");
+    expect(result).not.toContain("[[");
+  });
+});
+
+describe("unmaskResponse", () => {
+  test("unmasks all choices in response", () => {
+    const context = createMaskingContext();
+    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+    context.mapping["[[PERSON_1]]"] = "John Doe";
+
+    const response = {
+      id: "chatcmpl-123",
+      object: "chat.completion" as const,
+      created: 1234567890,
+      model: "gpt-4",
+      choices: [
+        {
+          index: 0,
+          message: {
+            role: "assistant" as const,
+            content: "Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]",
+          },
+          finish_reason: "stop" as const,
+        },
+      ],
+      usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+    };
+
+    const result = unmaskResponse(response, context, defaultConfig);
+
+    expect(result.choices[0].message.content).toBe("Contact John Doe at test@test.com");
+    expect(result.id).toBe("chatcmpl-123");
+    expect(result.model).toBe("gpt-4");
+  });
+});
+
+describe("edge cases", () => {
+  test("handles unicode in masked text", () => {
+    const text = "Kontakt: François Müller";
+    const entities: PIIEntity[] = [{ entity_type: "PERSON", start: 9, end: 24, score: 0.9 }];
+
+    const { masked, context } = mask(text, entities);
+    expect(masked).toBe("Kontakt: [[PERSON_1]]");
+
+    const unmasked = unmask(masked, context, defaultConfig);
+    expect(unmasked).toBe("Kontakt: François Müller");
+  });
+
+  test("handles empty text", () => {
+    const { masked, context } = mask("", []);
+    expect(masked).toBe("");
+    expect(unmask("", context, defaultConfig)).toBe("");
+  });
+
+  test("reuses placeholder for duplicate values", () => {
+    const text = "a@b.com and again a@b.com";
+    const entities: PIIEntity[] = [
+      { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 },
+      { entity_type: "EMAIL_ADDRESS", start: 18, end: 25, score: 1.0 },
+    ];
+
+    const result = mask(text, entities);
+
+    expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and again [[EMAIL_ADDRESS_1]]");
+    expect(Object.keys(result.context.mapping)).toHaveLength(1);
+  });
+});
diff --git a/src/pii/mask.ts b/src/pii/mask.ts

new file mode 100644 (file)

index 0000000..13d1c13
--- /dev/null
+++ b/src/pii/mask.ts
@@ -0,0 +1,131 @@
+import type { MaskingConfig } from "../config";
+import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
+import { resolveConflicts } from "../utils/conflict-resolver";
+import {
+  createPlaceholderContext,
+  flushBuffer,
+  incrementAndGenerate,
+  type MaskResult,
+  type PlaceholderContext,
+  processStreamChunk,
+  replaceWithPlaceholders,
+  restorePlaceholders,
+  restoreResponsePlaceholders,
+  transformMessagesPerPart,
+} from "../utils/message-transform";
+import {
+  generatePlaceholder as generatePlaceholderFromFormat,
+  PII_PLACEHOLDER_FORMAT,
+} from "../utils/placeholders";
+import type { PIIDetectionResult, PIIEntity } from "./detect";
+
+export type { MaskResult } from "../utils/message-transform";
+
+/**
+ * Creates a new masking context for a request
+ */
+export function createMaskingContext(): PlaceholderContext {
+  return createPlaceholderContext();
+}
+
+/**
+ * Generates a placeholder for a PII entity type
+ */
+function generatePlaceholder(entityType: string, context: PlaceholderContext): string {
+  return incrementAndGenerate(entityType, context, (type, count) =>
+    generatePlaceholderFromFormat(PII_PLACEHOLDER_FORMAT, type, count),
+  );
+}
+
+/**
+ * Creates formatValue function from masking config
+ */
+function getFormatValue(config: MaskingConfig): ((original: string) => string) | undefined {
+  return config.show_markers ? (original: string) => `${config.marker_text}${original}` : undefined;
+}
+
+/**
+ * Masks PII entities in text, replacing them with placeholders
+ */
+export function mask(
+  text: string,
+  entities: PIIEntity[],
+  context?: PlaceholderContext,
+): MaskResult {
+  const ctx = context || createMaskingContext();
+  const masked = replaceWithPlaceholders(
+    text,
+    entities,
+    ctx,
+    (e) => e.entity_type,
+    generatePlaceholder,
+    resolveConflicts,
+  );
+  return { masked, context: ctx };
+}
+
+/**
+ * Unmasks text by replacing placeholders with original values
+ *
+ * Optionally adds markers to indicate protected content
+ */
+export function unmask(text: string, context: PlaceholderContext, config: MaskingConfig): string {
+  return restorePlaceholders(text, context, getFormatValue(config));
+}
+
+/**
+ * Masks messages using per-part entity detection results
+ *
+ * Uses transformMessagesPerPart for the common iteration pattern.
+ */
+export function maskMessages(
+  messages: ChatMessage[],
+  detection: PIIDetectionResult,
+): { masked: ChatMessage[]; context: PlaceholderContext } {
+  const context = createMaskingContext();
+
+  const masked = transformMessagesPerPart(
+    messages,
+    detection.messageEntities,
+    (text, entities, ctx) => mask(text, entities, ctx).masked,
+    context,
+  );
+
+  return { masked, context };
+}
+
+/**
+ * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found
+ *
+ * Returns the unmasked portion and any remaining buffer that might contain partial placeholders
+ */
+export function unmaskStreamChunk(
+  buffer: string,
+  newChunk: string,
+  context: PlaceholderContext,
+  config: MaskingConfig,
+): { output: string; remainingBuffer: string } {
+  return processStreamChunk(buffer, newChunk, context, (text, ctx) => unmask(text, ctx, config));
+}
+
+/**
+ * Flushes remaining buffer at end of stream
+ */
+export function flushMaskingBuffer(
+  buffer: string,
+  context: PlaceholderContext,
+  config: MaskingConfig,
+): string {
+  return flushBuffer(buffer, context, (text, ctx) => unmask(text, ctx, config));
+}
+
+/**
+ * Unmasks a chat completion response by replacing placeholders in all choices
+ */
+export function unmaskResponse(
+  response: ChatCompletionResponse,
+  context: PlaceholderContext,
+  config: MaskingConfig,
+): ChatCompletionResponse {
+  return restoreResponsePlaceholders(response, context, getFormatValue(config));
+}
diff --git a/src/routes/info.ts b/src/routes/info.ts

index 7b3ab3ddaa320657fce39059a461f34d96095027..76d525c7af96f34f35541f4da0eed521ca0cbdeb 100644 (file)
--- a/src/routes/info.ts
+++ b/src/routes/info.ts
@@ -1,8 +1,8 @@
  import { Hono } from "hono";
  import pkg from "../../package.json";
  import { getConfig } from "../config";
+import { getPIIDetector } from "../pii/detect";
  import { getRouter } from "../services/decision";
-import { getPIIDetector } from "../services/pii-detector";
  
  export const infoRoutes = new Hono();
  
diff --git a/src/routes/proxy.ts b/src/routes/proxy.ts

index e275d21eb117c00dc4398760cc2a5dd179209b7b..99b91b5f3cbd88d53c2f65a9f49c101bb346056e 100644 (file)
--- a/src/routes/proxy.ts
+++ b/src/routes/proxy.ts
@@ -4,12 +4,9 @@ import { Hono } from "hono";
  import { proxy } from "hono/proxy";
  import { z } from "zod";
  import { getConfig, type MaskingConfig } from "../config";
-import {
-  detectSecrets,
-  extractTextFromRequest,
-  type SecretsDetectionResult,
-} from "../secrets/detect";
-import { type RedactionContext, redactSecrets, unredactResponse } from "../secrets/redact";
+import { unmaskResponse as unmaskPIIResponse } from "../pii/mask";
+import { detectSecretsInMessages, type MessageSecretsResult } from "../secrets/detect";
+import { maskMessages as maskSecretsMessages, unmaskSecretsResponse } from "../secrets/mask";
  import { getRouter, type MaskDecision, type RoutingDecision } from "../services/decision";
  import {
    type ChatCompletionRequest,
@@ -19,9 +16,9 @@ import {
    type LLMResult,
  } from "../services/llm-client";
  import { logRequest, type RequestLogData } from "../services/logger";
-import { unmaskResponse } from "../services/masking";
  import { createUnmaskingStream } from "../services/stream-transformer";
-import { type ContentPart, extractTextContent } from "../utils/content";
+import { extractTextContent } from "../utils/content";
+import type { PlaceholderContext } from "../utils/message-transform";
  
  // Request validation schema
  const ChatCompletionSchema = z
@@ -57,7 +54,7 @@ function createErrorLogData(
    statusCode: number,
    errorMessage: string,
    decision?: RoutingDecision,
-  secretsResult?: SecretsDetectionResult,
+  secretsResult?: MessageSecretsResult,
    maskedContent?: string,
  ): RequestLogData {
    const config = getConfig();
@@ -68,7 +65,7 @@ function createErrorLogData(
      model: body.model || "unknown",
      piiDetected: decision?.piiResult.hasPII ?? false,
      entities: decision
-      ? [...new Set(decision.piiResult.newEntities.map((e) => e.entity_type))]
+      ? [...new Set(decision.piiResult.allEntities.map((e) => e.entity_type))]
        : [],
      latencyMs: Date.now() - startTime,
      scanTimeMs: decision?.piiResult.scanTimeMs ?? 0,
@@ -110,14 +107,13 @@ proxyRoutes.post(
      const router = getRouter();
  
      // Track secrets detection state for response handling
-    let secretsResult: SecretsDetectionResult | undefined;
-    let redactionContext: RedactionContext | undefined;
-    let secretsRedacted = false;
+    let secretsResult: MessageSecretsResult | undefined;
+    let secretsMaskingContext: PlaceholderContext | undefined;
+    let secretsMasked = false;
  
-    // Secrets detection runs before PII detection
+    // Secrets detection runs before PII detection (per-part)
      if (config.secrets_detection.enabled) {
-      const text = extractTextFromRequest(body);
-      secretsResult = detectSecrets(text, config.secrets_detection);
+      secretsResult = detectSecretsInMessages(body.messages, config.secrets_detection);
  
        if (secretsResult.detected) {
          const secretTypes = secretsResult.matches.map((m) => m.type);
@@ -125,16 +121,14 @@ proxyRoutes.post(
  
          // Block action - return 400 error
          if (config.secrets_detection.action === "block") {
-          // Set headers before returning error
            c.header("X-PasteGuard-Secrets-Detected", "true");
            c.header("X-PasteGuard-Secrets-Types", secretTypesStr);
  
-          // Log metadata only (no secret content)
            logRequest(
              {
                timestamp: new Date().toISOString(),
                mode: config.mode,
-              provider: "openai", // Note: Request never reached provider
+              provider: "openai",
                model: body.model || "unknown",
                piiDetected: false,
                entities: [],
@@ -161,12 +155,12 @@ proxyRoutes.post(
            );
          }
  
-        // Redact action - replace secrets with placeholders and continue
-        if (config.secrets_detection.action === "redact") {
-          const redactedMessages = redactMessagesWithSecrets(body.messages, secretsResult);
-          body = { ...body, messages: redactedMessages.messages };
-          redactionContext = redactedMessages.context;
-          secretsRedacted = true;
+        // Mask action - replace secrets with placeholders (per-part)
+        if (config.secrets_detection.action === "mask") {
+          const result = maskSecretsMessages(body.messages, secretsResult);
+          body = { ...body, messages: result.masked };
+          secretsMaskingContext = result.context;
+          secretsMasked = true;
          }
  
          // route_local action is handled in handleCompletion via secretsResult
@@ -204,134 +198,12 @@ proxyRoutes.post(
        startTime,
        router,
        secretsResult,
-      redactionContext,
-      secretsRedacted,
+      secretsMaskingContext,
+      secretsMasked,
      );
    },
  );
  
-/**
- * Redacts secrets in all messages based on detection result
- * Returns redacted messages and the redaction context for unredaction
- */
-function redactMessagesWithSecrets(
-  messages: ChatMessage[],
-  secretsResult: SecretsDetectionResult,
-): { messages: ChatMessage[]; context: RedactionContext } {
-  // Build a map of message content to redactions
-  // Since we concatenated all messages with \n, we need to track positions per message
-  let currentOffset = 0;
-  const messagePositions: { start: number; end: number }[] = [];
-
-  for (const msg of messages) {
-    const text = extractTextContent(msg.content);
-    const length = text.length;
-    messagePositions.push({ start: currentOffset, end: currentOffset + length });
-    currentOffset += length + 1; // +1 for \n separator
-  }
-
-  // Create redaction context
-  let context: RedactionContext = {
-    mapping: {},
-    reverseMapping: {},
-    counters: {},
-  };
-
-  // Apply redactions to each message
-  const redactedMessages = messages.map((msg, i) => {
-    // Handle null/undefined content
-    if (!msg.content) {
-      return msg;
-    }
-
-    // Handle array content (multimodal messages)
-    if (Array.isArray(msg.content)) {
-      const msgPos = messagePositions[i];
-
-      // Filter redactions for this message
-      const messageRedactions = (secretsResult.redactions || [])
-        .filter((r) => r.start >= msgPos.start && r.end <= msgPos.end)
-        .map((r) => ({
-          ...r,
-          start: r.start - msgPos.start,
-          end: r.end - msgPos.start,
-        }));
-
-      if (messageRedactions.length === 0) {
-        return msg;
-      }
-
-      // Track offset position within the concatenated text for this message
-      // (matches how extractTextContent joins parts with \n)
-      let partOffset = 0;
-
-      // Redact only text parts of array content with proper offset tracking
-      const redactedContent = msg.content.map((part: ContentPart) => {
-        if (part.type === "text" && typeof part.text === "string") {
-          const partLength = part.text.length;
-
-          // Find redactions that apply to this specific part
-          const partRedactions = messageRedactions
-            .filter((r) => r.start < partOffset + partLength && r.end > partOffset)
-            .map((r) => ({
-              ...r,
-              start: Math.max(0, r.start - partOffset),
-              end: Math.min(partLength, r.end - partOffset),
-            }));
-
-          if (partRedactions.length > 0) {
-            const { redacted, context: updatedContext } = redactSecrets(
-              part.text,
-              partRedactions,
-              context,
-            );
-            context = updatedContext;
-            partOffset += partLength + 1; // +1 for \n separator
-            return { ...part, text: redacted };
-          }
-
-          partOffset += partLength + 1; // +1 for \n separator
-          return part;
-        }
-        return part;
-      });
-
-      return { ...msg, content: redactedContent };
-    }
-
-    // Handle string content (text-only messages)
-    if (typeof msg.content !== "string") {
-      return msg;
-    }
-
-    const msgPos = messagePositions[i];
-
-    // Filter redactions that fall within this message's position
-    const messageRedactions = (secretsResult.redactions || [])
-      .filter((r) => r.start >= msgPos.start && r.end <= msgPos.end)
-      .map((r) => ({
-        ...r,
-        start: r.start - msgPos.start,
-        end: r.end - msgPos.start,
-      }));
-
-    if (messageRedactions.length === 0) {
-      return msg;
-    }
-
-    const { redacted, context: updatedContext } = redactSecrets(
-      msg.content,
-      messageRedactions,
-      context,
-    );
-    context = updatedContext;
-
-    return { ...msg, content: redacted };
-  });
-
-  return { messages: redactedMessages, context };
-}
-
  /**
   * Handle chat completion for both route and mask modes
   */
@@ -341,9 +213,9 @@ async function handleCompletion(
    decision: RoutingDecision,
    startTime: number,
    router: ReturnType<typeof getRouter>,
-  secretsResult?: SecretsDetectionResult,
-  redactionContext?: RedactionContext,
-  secretsRedacted?: boolean,
+  secretsResult?: MessageSecretsResult,
+  secretsMaskingContext?: PlaceholderContext,
+  secretsMasked?: boolean,
  ) {
    const client = router.getClient(decision.provider);
    const maskingConfig = router.getMaskingConfig();
@@ -377,8 +249,8 @@ async function handleCompletion(
      c.header("X-PasteGuard-Secrets-Detected", "true");
      c.header("X-PasteGuard-Secrets-Types", secretsTypes.join(","));
    }
-  if (secretsRedacted) {
-    c.header("X-PasteGuard-Secrets-Redacted", "true");
+  if (secretsMasked) {
+    c.header("X-PasteGuard-Secrets-Masked", "true");
    }
  
    try {
@@ -394,7 +266,7 @@ async function handleCompletion(
          maskingConfig,
          secretsDetected,
          secretsTypes,
-        redactionContext,
+        secretsMaskingContext,
        );
      }
  
@@ -407,7 +279,7 @@ async function handleCompletion(
        maskingConfig,
        secretsDetected,
        secretsTypes,
-      redactionContext,
+      secretsMaskingContext,
      );
    } catch (error) {
      console.error("LLM request error:", error);
@@ -476,7 +348,7 @@ function handleStreamingResponse(
    maskingConfig: MaskingConfig,
    secretsDetected?: boolean,
    secretsTypes?: string[],
-  redactionContext?: RedactionContext,
+  secretsMaskingContext?: PlaceholderContext,
  ) {
    logRequest(
      createLogData(
@@ -497,14 +369,14 @@ function handleStreamingResponse(
  
    // Determine if we need to transform the stream
    const needsPIIUnmasking = isMaskDecision(decision);
-  const needsSecretsUnredaction = redactionContext !== undefined;
+  const needsSecretsUnmasking = secretsMaskingContext !== undefined;
  
-  if (needsPIIUnmasking || needsSecretsUnredaction) {
+  if (needsPIIUnmasking || needsSecretsUnmasking) {
      const unmaskingStream = createUnmaskingStream(
        result.response,
        needsPIIUnmasking ? decision.maskingContext : undefined,
        maskingConfig,
-      redactionContext,
+      secretsMaskingContext,
      );
      return c.body(unmaskingStream);
    }
@@ -524,7 +396,7 @@ function handleJsonResponse(
    maskingConfig: MaskingConfig,
    secretsDetected?: boolean,
    secretsTypes?: string[],
-  redactionContext?: RedactionContext,
+  secretsMaskingContext?: PlaceholderContext,
  ) {
    logRequest(
      createLogData(
@@ -543,12 +415,12 @@ function handleJsonResponse(
  
    // First unmask PII if needed
    if (isMaskDecision(decision)) {
-    response = unmaskResponse(response, decision.maskingContext, maskingConfig);
+    response = unmaskPIIResponse(response, decision.maskingContext, maskingConfig);
    }
  
-  // Then unredact secrets if needed
-  if (redactionContext) {
-    response = unredactResponse(response, redactionContext);
+  // Then unmask secrets if needed
+  if (secretsMaskingContext) {
+    response = unmaskSecretsResponse(response, secretsMaskingContext);
    }
  
    return c.json(response);
@@ -572,7 +444,7 @@ function createLogData(
      provider: decision.provider,
      model: result.model,
      piiDetected: decision.piiResult.hasPII,
-    entities: [...new Set(decision.piiResult.newEntities.map((e) => e.entity_type))],
+    entities: [...new Set(decision.piiResult.allEntities.map((e) => e.entity_type))],
      latencyMs: Date.now() - startTime,
      scanTimeMs: decision.piiResult.scanTimeMs,
      promptTokens: response?.usage?.prompt_tokens,
diff --git a/src/secrets/detect.test.ts b/src/secrets/detect.test.ts

index c73506c2c99c7a98afc222237fecdc3e8fd915e6..0337d83853759036e7f734a88780b417305d4aef 100644 (file)
--- a/src/secrets/detect.test.ts
+++ b/src/secrets/detect.test.ts
@@ -1,7 +1,6 @@
  import { describe, expect, test } from "bun:test";
  import type { SecretsDetectionConfig } from "../config";
-import type { ChatCompletionRequest } from "../services/llm-client";
-import { detectSecrets, extractTextFromRequest } from "./detect";
+import { detectSecrets } from "./detect";
  
  const defaultConfig: SecretsDetectionConfig = {
    enabled: true,
@@ -50,8 +49,8 @@ describe("detectSecrets", () => {
      expect(result.matches).toHaveLength(1);
      expect(result.matches[0].type).toBe("OPENSSH_PRIVATE_KEY");
      expect(result.matches[0].count).toBe(1);
-    expect(result.redactions).toBeDefined();
-    expect(result.redactions?.length).toBe(1);
+    expect(result.locations).toBeDefined();
+    expect(result.locations?.length).toBe(1);
    });
  
    test("detects RSA private key", () => {
@@ -85,7 +84,7 @@ describe("detectSecrets", () => {
      expect(result.matches).toHaveLength(1);
      expect(result.matches[0].type).toBe("OPENSSH_PRIVATE_KEY");
      expect(result.matches[0].count).toBe(2);
-    expect(result.redactions?.length).toBe(2);
+    expect(result.locations?.length).toBe(2);
    });
  
    test("detects multiple secrets of different types", () => {
@@ -169,13 +168,13 @@ describe("detectSecrets", () => {
      expect(result.matches[0].count).toBe(1); // Should be 1, not 2
    });
  
-  test("redactions are sorted by start position descending", () => {
+  test("locations are sorted by start position descending", () => {
      const text = `${opensshKey}\n\n${rsaKey}`;
      const result = detectSecrets(text, defaultConfig);
-    expect(result.redactions).toBeDefined();
-    if (result.redactions && result.redactions.length > 1) {
-      for (let i = 0; i < result.redactions.length - 1; i++) {
-        expect(result.redactions[i].start).toBeGreaterThan(result.redactions[i + 1].start);
+    expect(result.locations).toBeDefined();
+    if (result.locations && result.locations.length > 1) {
+      for (let i = 0; i < result.locations.length - 1; i++) {
+        expect(result.locations[i].start).toBeGreaterThan(result.locations[i + 1].start);
        }
      }
    });
@@ -203,8 +202,8 @@ describe("detectSecrets - API Keys", () => {
      expect(result.matches).toHaveLength(1);
      expect(result.matches[0].type).toBe("API_KEY_OPENAI");
      expect(result.matches[0].count).toBe(1);
-    expect(result.redactions).toBeDefined();
-    expect(result.redactions?.[0].type).toBe("API_KEY_OPENAI");
+    expect(result.locations).toBeDefined();
+    expect(result.locations?.[0].type).toBe("API_KEY_OPENAI");
    });
  
    test("detects AWS access key", () => {
@@ -408,13 +407,13 @@ ADMIN_PWD=adminpass123`;
      expect(result.detected).toBe(false);
    });
  
-  test("redaction positions are correct", () => {
+  test("location positions are correct", () => {
      const text = "config: DB_PASSWORD=mysecretpassword123 here";
      const result = detectSecrets(text, passwordConfig);
-    expect(result.redactions).toBeDefined();
-    expect(result.redactions?.length).toBe(1);
-    const redacted = text.slice(result.redactions![0].start, result.redactions![0].end);
-    expect(redacted).toBe("DB_PASSWORD=mysecretpassword123");
+    expect(result.locations).toBeDefined();
+    expect(result.locations?.length).toBe(1);
+    const matched = text.slice(result.locations![0].start, result.locations![0].end);
+    expect(matched).toBe("DB_PASSWORD=mysecretpassword123");
    });
  });
  
@@ -481,13 +480,13 @@ SESSION_SECRET=session_key_here`;
      expect(result.detected).toBe(false);
    });
  
-  test("redaction positions are correct", () => {
+  test("location positions are correct", () => {
      const text = "export APP_SECRET=mysupersecretvalue123 # comment";
      const result = detectSecrets(text, secretConfig);
-    expect(result.redactions).toBeDefined();
-    expect(result.redactions?.length).toBe(1);
-    const redacted = text.slice(result.redactions![0].start, result.redactions![0].end);
-    expect(redacted).toBe("APP_SECRET=mysupersecretvalue123");
+    expect(result.locations).toBeDefined();
+    expect(result.locations?.length).toBe(1);
+    const matched = text.slice(result.locations![0].start, result.locations![0].end);
+    expect(matched).toBe("APP_SECRET=mysupersecretvalue123");
    });
  });
  
@@ -596,13 +595,13 @@ CACHE=redis://default:pass@redis:6379`;
      expect(result.detected).toBe(false);
    });
  
-  test("redaction covers full connection string", () => {
+  test("location covers full connection string", () => {
      const text = "export DB=postgres://admin:secret123@db.example.com:5432/prod";
      const result = detectSecrets(text, connConfig);
-    expect(result.redactions).toBeDefined();
-    expect(result.redactions?.length).toBe(1);
-    const redacted = text.slice(result.redactions![0].start, result.redactions![0].end);
-    expect(redacted).toBe("postgres://admin:secret123@db.example.com:5432/prod");
+    expect(result.locations).toBeDefined();
+    expect(result.locations?.length).toBe(1);
+    const matched = text.slice(result.locations![0].start, result.locations![0].end);
+    expect(matched).toBe("postgres://admin:secret123@db.example.com:5432/prod");
    });
  });
  
@@ -636,81 +635,16 @@ ${rsaKey}
      expect(result.matches.length).toBeGreaterThanOrEqual(4);
    });
  
-  test("redaction positions are correct for all types", () => {
+  test("location positions are correct for all types", () => {
      const text = `Key: ${awsAccessKey} and ${githubToken}`;
      const result = detectSecrets(text, allConfig);
-    expect(result.redactions).toBeDefined();
-    expect(result.redactions?.length).toBe(2);
+    expect(result.locations).toBeDefined();
+    expect(result.locations?.length).toBe(2);
  
-    // Verify redactions point to correct positions
-    for (const redaction of result.redactions || []) {
-      const extracted = text.slice(redaction.start, redaction.end);
+    // Verify locations point to correct positions
+    for (const location of result.locations || []) {
+      const extracted = text.slice(location.start, location.end);
        expect(extracted.length).toBeGreaterThan(10);
      }
    });
  });
-
-describe("extractTextFromRequest", () => {
-  test("extracts text from simple messages", () => {
-    const request: ChatCompletionRequest = {
-      messages: [
-        { role: "user", content: "Hello world" },
-        { role: "assistant", content: "Hi there" },
-      ],
-    };
-    const text = extractTextFromRequest(request);
-    expect(text).toBe("Hello world\nHi there");
-  });
-
-  test("extracts text from system messages", () => {
-    const request: ChatCompletionRequest = {
-      messages: [
-        { role: "system", content: "You are helpful" },
-        { role: "user", content: "Hello" },
-      ],
-    };
-    const text = extractTextFromRequest(request);
-    expect(text).toBe("You are helpful\nHello");
-  });
-
-  test("filters out empty messages", () => {
-    const request: ChatCompletionRequest = {
-      messages: [
-        { role: "user", content: "Hello" },
-        { role: "assistant", content: "" },
-        { role: "user", content: "World" },
-      ],
-    };
-    const text = extractTextFromRequest(request);
-    expect(text).toBe("Hello\nWorld");
-  });
-
-  test("handles single message", () => {
-    const request: ChatCompletionRequest = {
-      messages: [{ role: "user", content: "Test" }],
-    };
-    const text = extractTextFromRequest(request);
-    expect(text).toBe("Test");
-  });
-
-  test("handles empty messages array", () => {
-    const request: ChatCompletionRequest = {
-      messages: [],
-    };
-    const text = extractTextFromRequest(request);
-    expect(text).toBe("");
-  });
-
-  test("extracts all message content in order", () => {
-    const request: ChatCompletionRequest = {
-      messages: [
-        { role: "system", content: "System" },
-        { role: "user", content: "User1" },
-        { role: "assistant", content: "Assistant" },
-        { role: "user", content: "User2" },
-      ],
-    };
-    const text = extractTextFromRequest(request);
-    expect(text).toBe("System\nUser1\nAssistant\nUser2");
-  });
-});
diff --git a/src/secrets/detect.ts b/src/secrets/detect.ts

index 241a84665df0bd114d8cf43bfb564f6a3b0a3771..36e5edd94b951a0349bedaa593b1e3cd6f955ba4 100644 (file)
--- a/src/secrets/detect.ts
+++ b/src/secrets/detect.ts
@@ -1,32 +1,22 @@
  import type { SecretsDetectionConfig } from "../config";
-import type { ChatCompletionRequest } from "../services/llm-client";
-import { extractTextContent } from "../utils/content";
+import type { ChatMessage } from "../services/llm-client";
+import type { ContentPart } from "../utils/content";
  import { patternDetectors } from "./patterns";
-import type { SecretsDetectionResult, SecretsMatch, SecretsRedaction } from "./patterns/types";
+import type {
+  MessageSecretsResult,
+  SecretLocation,
+  SecretsDetectionResult,
+  SecretsMatch,
+} from "./patterns/types";
  
-// Re-export types from patterns module for backwards compatibility
  export type {
+  MessageSecretsResult,
    SecretEntityType,
+  SecretLocation,
    SecretsDetectionResult,
    SecretsMatch,
-  SecretsRedaction,
  } from "./patterns/types";
  
-/**
- * Extracts all text content from an OpenAI chat completion request
- *
- * Concatenates content from all messages (system, user, assistant) for secrets scanning.
- * Handles both string content (text-only) and array content (multimodal messages).
- *
- * Returns concatenated text for secrets scanning.
- */
-export function extractTextFromRequest(body: ChatCompletionRequest): string {
-  return body.messages
-    .map((message) => extractTextContent(message.content))
-    .filter((text) => text.length > 0)
-    .join("\n");
-}
-
  /**
   * Detects secret material (e.g. private keys, API keys, tokens) in text
   *
@@ -54,7 +44,7 @@ export function detectSecrets(
  
    // Aggregate results from all pattern detectors
    const allMatches: SecretsMatch[] = [];
-  const allRedactions: SecretsRedaction[] = [];
+  const allLocations: SecretLocation[] = [];
  
    for (const detector of patternDetectors) {
      // Skip detectors that don't handle any enabled types
@@ -63,17 +53,80 @@ export function detectSecrets(
  
      const result = detector.detect(textToScan, enabledTypes);
      allMatches.push(...result.matches);
-    if (result.redactions) {
-      allRedactions.push(...result.redactions);
+    if (result.locations) {
+      allLocations.push(...result.locations);
      }
    }
  
-  // Sort redactions by start position (descending) for safe replacement
-  allRedactions.sort((a, b) => b.start - a.start);
+  // Sort locations by start position (descending) for safe replacement
+  allLocations.sort((a, b) => b.start - a.start);
  
    return {
      detected: allMatches.length > 0,
      matches: allMatches,
-    redactions: allRedactions.length > 0 ? allRedactions : undefined,
+    locations: allLocations.length > 0 ? allLocations : undefined,
+  };
+}
+
+/**
+ * Detects secrets in chat messages with per-part granularity
+ *
+ * For string content, partIdx is always 0.
+ * For array content (multimodal), each text part is scanned separately.
+ * This avoids complex offset mapping when applying masks.
+ */
+export function detectSecretsInMessages(
+  messages: ChatMessage[],
+  config: SecretsDetectionConfig,
+): MessageSecretsResult {
+  if (!config.enabled) {
+    return {
+      detected: false,
+      matches: [],
+      messageLocations: messages.map(() => []),
+    };
+  }
+
+  const matchCounts = new Map<string, number>();
+
+  const messageLocations: SecretLocation[][][] = messages.map((message) => {
+    // String content → single part at index 0
+    if (typeof message.content === "string") {
+      const result = detectSecrets(message.content, config);
+      for (const match of result.matches) {
+        matchCounts.set(match.type, (matchCounts.get(match.type) || 0) + match.count);
+      }
+      return [result.locations || []];
+    }
+
+    // Array content (multimodal) → one array per part
+    if (Array.isArray(message.content)) {
+      return message.content.map((part: ContentPart) => {
+        if (part.type !== "text" || typeof part.text !== "string") {
+          return [];
+        }
+        const result = detectSecrets(part.text, config);
+        for (const match of result.matches) {
+          matchCounts.set(match.type, (matchCounts.get(match.type) || 0) + match.count);
+        }
+        return result.locations || [];
+      });
+    }
+
+    // Null/undefined content
+    return [];
+  });
+
+  const allMatches: SecretsMatch[] = [];
+  for (const [type, count] of matchCounts) {
+    allMatches.push({ type: type as SecretLocation["type"], count });
+  }
+
+  const hasLocations = messageLocations.some((msg) => msg.some((part) => part.length > 0));
+
+  return {
+    detected: hasLocations,
+    matches: allMatches,
+    messageLocations,
    };
  }
diff --git a/src/secrets/mask.test.ts b/src/secrets/mask.test.ts

new file mode 100644 (file)

index 0000000..e58cb46
--- /dev/null
+++ b/src/secrets/mask.test.ts
@@ -0,0 +1,272 @@
+import { describe, expect, test } from "bun:test";
+import { createSecretsResult } from "../test-utils/detection-results";
+import type { SecretLocation } from "./detect";
+import {
+  createSecretsMaskingContext,
+  flushSecretsMaskingBuffer,
+  maskMessages,
+  maskSecrets,
+  unmaskSecrets,
+  unmaskSecretsResponse,
+  unmaskSecretsStreamChunk,
+} from "./mask";
+
+const sampleSecret = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx";
+
+describe("secrets placeholder format", () => {
+  test("uses [[SECRET_MASKED_TYPE_N]] format", () => {
+    const text = `My API key is ${sampleSecret}`;
+    const locations: SecretLocation[] = [
+      { start: 14, end: 14 + sampleSecret.length, type: "API_KEY_OPENAI" },
+    ];
+    const result = maskSecrets(text, locations);
+
+    expect(result.masked).toBe("My API key is [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+  });
+
+  test("increments counter per secret type", () => {
+    const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
+    const text = `Key1: ${sampleSecret} Key2: ${anotherSecret}`;
+    const locations: SecretLocation[] = [
+      { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" },
+      {
+        start: 6 + sampleSecret.length + 7,
+        end: 6 + sampleSecret.length + 7 + anotherSecret.length,
+        type: "API_KEY_OPENAI",
+      },
+    ];
+    const result = maskSecrets(text, locations);
+
+    expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+    expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_2]]");
+  });
+
+  test("tracks different secret types separately", () => {
+    const awsKey = "AKIAIOSFODNN7EXAMPLE";
+    const text = `OpenAI: ${sampleSecret} AWS: ${awsKey}`;
+    const locations: SecretLocation[] = [
+      { start: 8, end: 8 + sampleSecret.length, type: "API_KEY_OPENAI" },
+      {
+        start: 8 + sampleSecret.length + 6,
+        end: 8 + sampleSecret.length + 6 + awsKey.length,
+        type: "API_KEY_AWS",
+      },
+    ];
+    const result = maskSecrets(text, locations);
+
+    expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+    expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_AWS_1]]");
+  });
+});
+
+describe("maskMessages with MessageSecretsResult", () => {
+  test("masks secrets in multiple messages", () => {
+    const messages = [
+      { role: "user" as const, content: `My key is ${sampleSecret}` },
+      { role: "assistant" as const, content: "I'll help you with that." },
+    ];
+    const detection = createSecretsResult([
+      [[{ start: 10, end: 10 + sampleSecret.length, type: "API_KEY_OPENAI" }]],
+      [[]],
+    ]);
+
+    const { masked, context } = maskMessages(messages, detection);
+
+    expect(masked[0].content).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+    expect(masked[0].content).not.toContain(sampleSecret);
+    expect(masked[1].content).toBe("I'll help you with that.");
+    expect(Object.keys(context.mapping)).toHaveLength(1);
+  });
+
+  test("shares context across messages - same secret gets same placeholder", () => {
+    const messages = [
+      { role: "user" as const, content: `Key1: ${sampleSecret}` },
+      { role: "user" as const, content: `Key2: ${sampleSecret}` },
+    ];
+    const detection = createSecretsResult([
+      [[{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }]],
+      [[{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }]],
+    ]);
+
+    const { masked, context } = maskMessages(messages, detection);
+
+    expect(masked[0].content).toBe("Key1: [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+    expect(masked[1].content).toBe("Key2: [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+    expect(Object.keys(context.mapping)).toHaveLength(1);
+  });
+
+  test("handles multimodal array content", () => {
+    const messages = [
+      {
+        role: "user" as const,
+        content: [
+          { type: "text", text: `Key: ${sampleSecret}` },
+          { type: "image_url", image_url: { url: "https://example.com/img.jpg" } },
+        ],
+      },
+    ];
+    const detection = createSecretsResult([
+      [[{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }], []],
+    ]);
+
+    const { masked } = maskMessages(messages, detection);
+
+    const content = masked[0].content as Array<{ type: string; text?: string }>;
+    expect(content[0].text).toBe("Key: [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+    expect(content[1].type).toBe("image_url");
+  });
+});
+
+describe("streaming with secrets placeholders", () => {
+  test("buffers partial [[SECRET_MASKED placeholder", () => {
+    const context = createSecretsMaskingContext();
+    context.mapping["[[SECRET_MASKED_API_KEY_OPENAI_1]]"] = sampleSecret;
+
+    const { output, remainingBuffer } = unmaskSecretsStreamChunk("", "Key: [[SECRET_MAS", context);
+
+    expect(output).toBe("Key: ");
+    expect(remainingBuffer).toBe("[[SECRET_MAS");
+  });
+
+  test("completes buffered placeholder across chunks", () => {
+    const context = createSecretsMaskingContext();
+    context.mapping["[[SECRET_MASKED_API_KEY_OPENAI_1]]"] = sampleSecret;
+
+    const { output, remainingBuffer } = unmaskSecretsStreamChunk(
+      "[[SECRET_MAS",
+      "KED_API_KEY_OPENAI_1]] done",
+      context,
+    );
+
+    expect(output).toBe(`${sampleSecret} done`);
+    expect(remainingBuffer).toBe("");
+  });
+
+  test("flushes incomplete buffer as-is", () => {
+    const context = createSecretsMaskingContext();
+    const result = flushSecretsMaskingBuffer("[[SECRET_MAS", context);
+    expect(result).toBe("[[SECRET_MAS");
+  });
+});
+
+describe("mask -> unmask roundtrip", () => {
+  test("preserves original data through roundtrip", () => {
+    const originalText = `
+Here are my credentials:
+OpenAI API Key: ${sampleSecret}
+Please store them securely.
+`;
+    const locations: SecretLocation[] = [
+      {
+        start: originalText.indexOf(sampleSecret),
+        end: originalText.indexOf(sampleSecret) + sampleSecret.length,
+        type: "API_KEY_OPENAI",
+      },
+    ];
+
+    const { masked, context } = maskSecrets(originalText, locations);
+
+    expect(masked).not.toContain(sampleSecret);
+    expect(masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+
+    const restored = unmaskSecrets(masked, context);
+    expect(restored).toBe(originalText);
+  });
+});
+
+describe("unmaskSecretsResponse", () => {
+  test("unmasks all choices in response", () => {
+    const context = createSecretsMaskingContext();
+    context.mapping["[[SECRET_MASKED_API_KEY_OPENAI_1]]"] = sampleSecret;
+
+    const response = {
+      id: "test",
+      object: "chat.completion" as const,
+      created: Date.now(),
+      model: "gpt-4",
+      choices: [
+        {
+          index: 0,
+          message: {
+            role: "assistant" as const,
+            content: "Your key is [[SECRET_MASKED_API_KEY_OPENAI_1]]",
+          },
+          finish_reason: "stop" as const,
+        },
+      ],
+    };
+
+    const result = unmaskSecretsResponse(response, context);
+    expect(result.choices[0].message.content).toBe(`Your key is ${sampleSecret}`);
+  });
+
+  test("preserves response structure", () => {
+    const context = createSecretsMaskingContext();
+    const response = {
+      id: "test-id",
+      object: "chat.completion" as const,
+      created: 12345,
+      model: "gpt-4-turbo",
+      choices: [
+        {
+          index: 0,
+          message: { role: "assistant" as const, content: "Hello" },
+          finish_reason: "stop" as const,
+        },
+      ],
+      usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+    };
+
+    const result = unmaskSecretsResponse(response, context);
+    expect(result.id).toBe("test-id");
+    expect(result.model).toBe("gpt-4-turbo");
+    expect(result.usage).toEqual({ prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 });
+  });
+});
+
+describe("edge cases", () => {
+  test("returns original text when no locations", () => {
+    const text = "Hello world";
+    const result = maskSecrets(text, []);
+    expect(result.masked).toBe("Hello world");
+    expect(Object.keys(result.context.mapping)).toHaveLength(0);
+  });
+
+  test("reuses placeholder for duplicate secret values", () => {
+    const text = `Key1: ${sampleSecret} Key2: ${sampleSecret}`;
+    const locations: SecretLocation[] = [
+      { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" },
+      {
+        start: 6 + sampleSecret.length + 7,
+        end: 6 + sampleSecret.length * 2 + 7,
+        type: "API_KEY_OPENAI",
+      },
+    ];
+    const result = maskSecrets(text, locations);
+
+    expect(result.masked).toBe(
+      "Key1: [[SECRET_MASKED_API_KEY_OPENAI_1]] Key2: [[SECRET_MASKED_API_KEY_OPENAI_1]]",
+    );
+    expect(Object.keys(result.context.mapping)).toHaveLength(1);
+  });
+
+  test("preserves context across multiple calls", () => {
+    const context = createSecretsMaskingContext();
+
+    maskSecrets(
+      `Key: ${sampleSecret}`,
+      [{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }],
+      context,
+    );
+
+    const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
+    const result2 = maskSecrets(
+      `Another: ${anotherSecret}`,
+      [{ start: 9, end: 9 + anotherSecret.length, type: "API_KEY_OPENAI" }],
+      context,
+    );
+
+    expect(result2.masked).toBe("Another: [[SECRET_MASKED_API_KEY_OPENAI_2]]");
+    expect(Object.keys(context.mapping)).toHaveLength(2);
+  });
+});
diff --git a/src/secrets/mask.ts b/src/secrets/mask.ts

new file mode 100644 (file)

index 0000000..0c8cf19
--- /dev/null
+++ b/src/secrets/mask.ts
@@ -0,0 +1,115 @@
+import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
+import { resolveOverlaps } from "../utils/conflict-resolver";
+import {
+  createPlaceholderContext,
+  flushBuffer,
+  incrementAndGenerate,
+  type MaskResult,
+  type PlaceholderContext,
+  processStreamChunk,
+  replaceWithPlaceholders,
+  restorePlaceholders,
+  restoreResponsePlaceholders,
+  transformMessagesPerPart,
+} from "../utils/message-transform";
+import { generateSecretPlaceholder } from "../utils/placeholders";
+import type { MessageSecretsResult, SecretLocation } from "./detect";
+
+export type { MaskResult } from "../utils/message-transform";
+
+/**
+ * Creates a new secrets masking context for a request
+ */
+export function createSecretsMaskingContext(): PlaceholderContext {
+  return createPlaceholderContext();
+}
+
+/**
+ * Generates a placeholder for a secret type
+ *
+ * Format: [[SECRET_MASKED_{TYPE}_{N}]] e.g. [[SECRET_MASKED_API_KEY_OPENAI_1]]
+ */
+function generatePlaceholder(secretType: string, context: PlaceholderContext): string {
+  return incrementAndGenerate(secretType, context, generateSecretPlaceholder);
+}
+
+/**
+ * Masks secrets in text, replacing them with placeholders
+ */
+export function maskSecrets(
+  text: string,
+  locations: SecretLocation[],
+  context?: PlaceholderContext,
+): MaskResult {
+  const ctx = context || createSecretsMaskingContext();
+  const masked = replaceWithPlaceholders(
+    text,
+    locations,
+    ctx,
+    (loc) => loc.type,
+    generatePlaceholder,
+    resolveOverlaps,
+  );
+  return { masked, context: ctx };
+}
+
+/**
+ * Unmasks text by replacing placeholders with original secrets
+ *
+ * @param text - Text containing secret placeholders
+ * @param context - Masking context with mappings
+ */
+export function unmaskSecrets(text: string, context: PlaceholderContext): string {
+  return restorePlaceholders(text, context);
+}
+
+/**
+ * Masks secrets in messages using per-part detection results
+ *
+ * Uses transformMessagesPerPart for the common iteration pattern.
+ */
+export function maskMessages(
+  messages: ChatMessage[],
+  detection: MessageSecretsResult,
+): { masked: ChatMessage[]; context: PlaceholderContext } {
+  const context = createSecretsMaskingContext();
+
+  const masked = transformMessagesPerPart(
+    messages,
+    detection.messageLocations,
+    (text, locations, ctx) => maskSecrets(text, locations, ctx).masked,
+    context,
+  );
+
+  return { masked, context };
+}
+
+/**
+ * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found
+ *
+ * Returns the unmasked portion and any remaining buffer that might contain partial placeholders.
+ */
+export function unmaskSecretsStreamChunk(
+  buffer: string,
+  newChunk: string,
+  context: PlaceholderContext,
+): { output: string; remainingBuffer: string } {
+  return processStreamChunk(buffer, newChunk, context, unmaskSecrets);
+}
+
+/**
+ * Flushes remaining buffer at end of stream
+ */
+export function flushSecretsMaskingBuffer(buffer: string, context: PlaceholderContext): string {
+  return flushBuffer(buffer, context, unmaskSecrets);
+}
+
+/**
+ * Unmasks a chat completion response by replacing placeholders in all choices
+ */
+export function unmaskSecretsResponse(
+  response: ChatCompletionResponse,
+  context: PlaceholderContext,
+): ChatCompletionResponse {
+  return restoreResponsePlaceholders(response, context);
+}
diff --git a/src/secrets/multimodal.test.ts b/src/secrets/multimodal.test.ts

index b58be8c0a48a684a29e358ad2c3f558533337a52..e5a4a2aa933401d5f9e16557e7c1a875b743176a 100644 (file)
--- a/src/secrets/multimodal.test.ts
+++ b/src/secrets/multimodal.test.ts
@@ -1,11 +1,25 @@
  import { describe, expect, test } from "bun:test";
+import type { PIIDetectionResult, PIIEntity } from "../pii/detect";
+import { maskMessages } from "../pii/mask";
  import type { ChatMessage } from "../services/llm-client";
-import { maskMessages } from "../services/masking";
-import type { PIIEntity } from "../services/pii-detector";
  import type { ContentPart } from "../utils/content";
  
+/**
+ * Helper to create PIIDetectionResult from per-part entities
+ */
+function createPIIResult(messageEntities: PIIEntity[][][]): PIIDetectionResult {
+  return {
+    hasPII: messageEntities.flat(2).length > 0,
+    messageEntities,
+    allEntities: messageEntities.flat(2),
+    scanTimeMs: 0,
+    language: "en",
+    languageFallback: false,
+  };
+}
+
  describe("Multimodal content handling", () => {
-  describe("PII masking with offset tracking", () => {
+  describe("PII masking with per-part entities", () => {
      test("masks PII in multimodal array content", () => {
        const messages: ChatMessage[] = [
          {
@@ -18,16 +32,19 @@ describe("Multimodal content handling", () => {
          },
        ];
  
-      // Concatenated text: "My email is john@example.com and\nmy phone is 555-1234"
-      // Entities for this concatenated text:
-      const entities: PIIEntity[] = [
-        { entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 0.9 }, // john@example.com in part 0
-        { entity_type: "PHONE_NUMBER", start: 45, end: 53, score: 0.85 }, // 555-1234 in part 2 (after newline)
-      ];
-
-      const entitiesByMessage = [entities];
+      // Per-part entities: messageEntities[msgIdx][partIdx] = entities
+      const detection = createPIIResult([
+        [
+          // Part 0: email entity (positions relative to part text)
+          [{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 0.9 }],
+          // Part 1: image, no entities
+          [],
+          // Part 2: phone entity (positions relative to part text)
+          [{ entity_type: "PHONE_NUMBER", start: 12, end: 20, score: 0.85 }],
+        ],
+      ]);
  
-      const { masked } = maskMessages(messages, entitiesByMessage);
+      const { masked } = maskMessages(messages, detection);
  
        // Verify the content is still an array
        expect(Array.isArray(masked[0].content)).toBe(true);
@@ -50,8 +67,6 @@ describe("Multimodal content handling", () => {
      });
  
      test("returns masked array instead of original unmasked array", () => {
-      // This tests the bug fix: previously array content was extracted and masked,
-      // but then the original array was returned unchanged
        const messages: ChatMessage[] = [
          {
            role: "user",
@@ -59,12 +74,17 @@ describe("Multimodal content handling", () => {
          },
        ];
  
-      const entities: PIIEntity[] = [
-        { entity_type: "PERSON", start: 8, end: 13, score: 0.9 }, // Alice
-        { entity_type: "EMAIL_ADDRESS", start: 17, end: 33, score: 0.95 }, // alice@secret.com
-      ];
+      const detection = createPIIResult([
+        [
+          // Part 0 entities
+          [
+            { entity_type: "PERSON", start: 8, end: 13, score: 0.9 },
+            { entity_type: "EMAIL_ADDRESS", start: 17, end: 33, score: 0.95 },
+          ],
+        ],
+      ]);
  
-      const { masked } = maskMessages(messages, [entities]);
+      const { masked } = maskMessages(messages, detection);
  
        // Verify content is still array
        expect(Array.isArray(masked[0].content)).toBe(true);
@@ -78,40 +98,58 @@ describe("Multimodal content handling", () => {
        expect(maskedContent[0].text).toContain("[[EMAIL_ADDRESS_1]]");
      });
  
-    test("handles entities spanning multiple parts with proper offsets", () => {
+    test("handles multiple text parts independently", () => {
        const messages: ChatMessage[] = [
          {
            role: "user",
            content: [
-            { type: "text", text: "First part with email@" },
-            { type: "text", text: "example.com in two parts" },
+            { type: "text", text: "First: john@example.com" },
+            { type: "text", text: "Second: jane@example.com" },
            ],
          },
        ];
  
-      // In concatenated text: "First part with email@\nexample.com in two parts"
-      // Email spans from position 16 to 39 (crossing the newline at position 22)
-      const entities: PIIEntity[] = [
-        { entity_type: "EMAIL_ADDRESS", start: 16, end: 34, score: 0.9 },
-      ];
+      const detection = createPIIResult([
+        [
+          // Part 0 entity
+          [{ entity_type: "EMAIL_ADDRESS", start: 7, end: 23, score: 0.9 }],
+          // Part 1 entity
+          [{ entity_type: "EMAIL_ADDRESS", start: 8, end: 24, score: 0.9 }],
+        ],
+      ]);
  
-      const { masked } = maskMessages(messages, [entities]);
+      const { masked } = maskMessages(messages, detection);
  
        const maskedContent = masked[0].content as ContentPart[];
  
-      // Both parts should be affected by the email entity
-      // Part 0: "First part with [[EMAIL" or similar
-      // Part 1: "ADDRESS_1]] in two parts" or similar
-      // The exact split depends on how the masking handles cross-boundary entities
+      expect(maskedContent[0].text).toBe("First: [[EMAIL_ADDRESS_1]]");
+      expect(maskedContent[1].text).toBe("Second: [[EMAIL_ADDRESS_2]]");
+    });
+
+    test("handles mixed string and array content messages", () => {
+      const messages: ChatMessage[] = [
+        { role: "system", content: "You are helpful" },
+        {
+          role: "user",
+          content: [{ type: "text", text: "My name is John" }],
+        },
+        { role: "assistant", content: "Hello John!" },
+      ];
+
+      const detection = createPIIResult([
+        // Message 0 (system): no PII
+        [[]],
+        // Message 1 (user multimodal): PII in part 0
+        [[{ entity_type: "PERSON", start: 11, end: 15, score: 0.9 }]],
+        // Message 2 (assistant): PII in part 0
+        [[{ entity_type: "PERSON", start: 6, end: 10, score: 0.9 }]],
+      ]);
  
-      // At minimum, verify that the entity is masked somewhere
-      const fullMasked = maskedContent
-        .filter((p) => p.type === "text")
-        .map((p) => p.text)
-        .join("\n");
+      const { masked } = maskMessages(messages, detection);
  
-      expect(fullMasked).toContain("[[EMAIL_ADDRESS_");
-      expect(fullMasked).not.toContain("email@example.com");
+      expect(masked[0].content).toBe("You are helpful");
+      expect((masked[1].content as ContentPart[])[0].text).toBe("My name is [[PERSON_1]]");
+      expect(masked[2].content).toBe("Hello [[PERSON_1]]!");
      });
    });
  });
diff --git a/src/secrets/patterns/api-keys.ts b/src/secrets/patterns/api-keys.ts

index 438c1cde87d039a0150e9fb8bf8f6b679f1042aa..57e33e81d15d5ab682de9b56ee74e0fa5e875de2 100644 (file)
--- a/src/secrets/patterns/api-keys.ts
+++ b/src/secrets/patterns/api-keys.ts
@@ -1,4 +1,4 @@
-import type { PatternDetector, SecretsMatch, SecretsRedaction } from "./types";
+import type { PatternDetector, SecretLocation, SecretsMatch } from "./types";
  import { detectPattern } from "./utils";
  
  /**
@@ -14,31 +14,31 @@ export const apiKeysDetector: PatternDetector = {
  
    detect(text: string, enabledTypes: Set<string>) {
      const matches: SecretsMatch[] = [];
-    const redactions: SecretsRedaction[] = [];
+    const locations: SecretLocation[] = [];
  
      // OpenAI API keys: sk-... followed by alphanumeric chars
      // Modern format: sk-proj-... or sk-... with 48+ total chars
      if (enabledTypes.has("API_KEY_OPENAI")) {
        const openaiPattern = /sk-[a-zA-Z0-9_-]{45,}/g;
-      detectPattern(text, openaiPattern, "API_KEY_OPENAI", matches, redactions);
+      detectPattern(text, openaiPattern, "API_KEY_OPENAI", matches, locations);
      }
  
      // AWS access keys: AKIA followed by 16 uppercase alphanumeric chars
      if (enabledTypes.has("API_KEY_AWS")) {
        const awsPattern = /AKIA[0-9A-Z]{16}/g;
-      detectPattern(text, awsPattern, "API_KEY_AWS", matches, redactions);
+      detectPattern(text, awsPattern, "API_KEY_AWS", matches, locations);
      }
  
      // GitHub tokens: ghp_, gho_, ghu_, ghs_, ghr_ followed by 36+ alphanumeric chars
      if (enabledTypes.has("API_KEY_GITHUB")) {
        const githubPattern = /gh[pousr]_[a-zA-Z0-9]{36,}/g;
-      detectPattern(text, githubPattern, "API_KEY_GITHUB", matches, redactions);
+      detectPattern(text, githubPattern, "API_KEY_GITHUB", matches, locations);
      }
  
      return {
        detected: matches.length > 0,
        matches,
-      redactions: redactions.length > 0 ? redactions : undefined,
+      locations: locations.length > 0 ? locations : undefined,
      };
    },
  };
diff --git a/src/secrets/patterns/env-vars.ts b/src/secrets/patterns/env-vars.ts

index 3b5c602b2cce7e869358174862e8522a4cef47ef..fa1a7f4a365f6030db0706bece49163250de59ef 100644 (file)
--- a/src/secrets/patterns/env-vars.ts
+++ b/src/secrets/patterns/env-vars.ts
@@ -1,4 +1,4 @@
-import type { PatternDetector, SecretsMatch, SecretsRedaction } from "./types";
+import type { PatternDetector, SecretLocation, SecretsMatch } from "./types";
  import { detectPattern } from "./utils";
  
  /**
@@ -14,21 +14,21 @@ export const envVarsDetector: PatternDetector = {
  
    detect(text: string, enabledTypes: Set<string>) {
      const matches: SecretsMatch[] = [];
-    const redactions: SecretsRedaction[] = [];
+    const locations: SecretLocation[] = [];
  
      // Environment variable password patterns: _PASSWORD or _PWD suffix with value (8+ chars)
      // Case-insensitive for variable name, supports = and : assignment, quoted/unquoted values
      if (enabledTypes.has("ENV_PASSWORD")) {
        const passwordPattern =
          /[A-Za-z_][A-Za-z0-9_]*(?:PASSWORD|_PWD)\s*[=:]\s*['"]?[^\s'"]{8,}['"]?/gi;
-      detectPattern(text, passwordPattern, "ENV_PASSWORD", matches, redactions);
+      detectPattern(text, passwordPattern, "ENV_PASSWORD", matches, locations);
      }
  
      // Environment variable secret patterns: _SECRET suffix with value (8+ chars)
      // Case-insensitive for variable name, supports = and : assignment, quoted/unquoted values
      if (enabledTypes.has("ENV_SECRET")) {
        const secretPattern = /[A-Za-z_][A-Za-z0-9_]*_SECRET\s*[=:]\s*['"]?[^\s'"]{8,}['"]?/gi;
-      detectPattern(text, secretPattern, "ENV_SECRET", matches, redactions);
+      detectPattern(text, secretPattern, "ENV_SECRET", matches, locations);
      }
  
      // Database connection strings with embedded passwords (user:password@host format)
@@ -36,13 +36,13 @@ export const envVarsDetector: PatternDetector = {
      if (enabledTypes.has("CONNECTION_STRING")) {
        const connectionPattern =
          /(?:postgres(?:ql)?|mysql|mariadb|mongodb(?:\+srv)?|redis|amqps?):\/\/[^:]+:[^@\s]+@[^\s'"]+/gi;
-      detectPattern(text, connectionPattern, "CONNECTION_STRING", matches, redactions);
+      detectPattern(text, connectionPattern, "CONNECTION_STRING", matches, locations);
      }
  
      return {
        detected: matches.length > 0,
        matches,
-      redactions: redactions.length > 0 ? redactions : undefined,
+      locations: locations.length > 0 ? locations : undefined,
      };
    },
  };
diff --git a/src/secrets/patterns/private-keys.ts b/src/secrets/patterns/private-keys.ts

index 2325eeab5c143134ce9629cb67182c9e637ea604..1c9442428232fec5343cfbdf6304e119d2a612dd 100644 (file)
--- a/src/secrets/patterns/private-keys.ts
+++ b/src/secrets/patterns/private-keys.ts
@@ -1,8 +1,8 @@
  import type {
    PatternDetector,
+  SecretLocation,
    SecretsDetectionResult,
    SecretsMatch,
-  SecretsRedaction,
  } from "./types";
  import { detectPattern } from "./utils";
  
@@ -18,13 +18,13 @@ export const privateKeysDetector: PatternDetector = {
  
    detect(text: string, enabledTypes: Set<string>): SecretsDetectionResult {
      const matches: SecretsMatch[] = [];
-    const redactions: SecretsRedaction[] = [];
+    const locations: SecretLocation[] = [];
  
      // OpenSSH private key pattern
      if (enabledTypes.has("OPENSSH_PRIVATE_KEY")) {
        const opensshPattern =
          /-----BEGIN OPENSSH PRIVATE KEY-----[\s\S]*?-----END OPENSSH PRIVATE KEY-----/g;
-      detectPattern(text, opensshPattern, "OPENSSH_PRIVATE_KEY", matches, redactions);
+      detectPattern(text, opensshPattern, "OPENSSH_PRIVATE_KEY", matches, locations);
      }
  
      // PEM private key patterns
@@ -34,7 +34,7 @@ export const privateKeysDetector: PatternDetector = {
  
        // RSA PRIVATE KEY
        const rsaPattern = /-----BEGIN RSA PRIVATE KEY-----[\s\S]*?-----END RSA PRIVATE KEY-----/g;
-      detectPattern(text, rsaPattern, "PEM_PRIVATE_KEY", matches, redactions, matchedPositions);
+      detectPattern(text, rsaPattern, "PEM_PRIVATE_KEY", matches, locations, matchedPositions);
  
        // Remove PEM_PRIVATE_KEY from matches to accumulate all PEM types together
        const pemMatch = matches.find((m) => m.type === "PEM_PRIVATE_KEY");
@@ -51,7 +51,7 @@ export const privateKeysDetector: PatternDetector = {
          privateKeyPattern,
          "PEM_PRIVATE_KEY",
          tempMatches,
-        redactions,
+        locations,
          matchedPositions,
        );
        totalPemCount += tempMatches[0]?.count || 0;
@@ -65,7 +65,7 @@ export const privateKeysDetector: PatternDetector = {
          encryptedPattern,
          "PEM_PRIVATE_KEY",
          tempMatches2,
-        redactions,
+        locations,
          matchedPositions,
        );
        totalPemCount += tempMatches2[0]?.count || 0;
@@ -78,7 +78,7 @@ export const privateKeysDetector: PatternDetector = {
      return {
        detected: matches.length > 0,
        matches,
-      redactions: redactions.length > 0 ? redactions : undefined,
+      locations: locations.length > 0 ? locations : undefined,
      };
    },
  };
diff --git a/src/secrets/patterns/tokens.ts b/src/secrets/patterns/tokens.ts

index c5a6c02c4b72001b6de65161d56a9670a474e4ff..ff940b64fde0c6169c041da006372fc457eb152d 100644 (file)
--- a/src/secrets/patterns/tokens.ts
+++ b/src/secrets/patterns/tokens.ts
@@ -1,4 +1,4 @@
-import type { PatternDetector, SecretsMatch, SecretsRedaction } from "./types";
+import type { PatternDetector, SecretLocation, SecretsMatch } from "./types";
  import { detectPattern } from "./utils";
  
  /**
@@ -13,26 +13,26 @@ export const tokensDetector: PatternDetector = {
  
    detect(text: string, enabledTypes: Set<string>) {
      const matches: SecretsMatch[] = [];
-    const redactions: SecretsRedaction[] = [];
+    const locations: SecretLocation[] = [];
  
      // JWT tokens: three base64url segments separated by dots
      // Header starts with eyJ (base64 for {"...), minimum 20 chars per segment
      if (enabledTypes.has("JWT_TOKEN")) {
        const jwtPattern = /eyJ[a-zA-Z0-9_-]{20,}\.eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}/g;
-      detectPattern(text, jwtPattern, "JWT_TOKEN", matches, redactions);
+      detectPattern(text, jwtPattern, "JWT_TOKEN", matches, locations);
      }
  
      // Bearer tokens in Authorization-style contexts
      // Matches "Bearer " followed by a token (at least 40 chars to reduce placeholder matches)
      if (enabledTypes.has("BEARER_TOKEN")) {
        const bearerPattern = /Bearer\s+[a-zA-Z0-9._-]{40,}/gi;
-      detectPattern(text, bearerPattern, "BEARER_TOKEN", matches, redactions);
+      detectPattern(text, bearerPattern, "BEARER_TOKEN", matches, locations);
      }
  
      return {
        detected: matches.length > 0,
        matches,
-      redactions: redactions.length > 0 ? redactions : undefined,
+      locations: locations.length > 0 ? locations : undefined,
      };
    },
  };
diff --git a/src/secrets/patterns/types.ts b/src/secrets/patterns/types.ts

index 95b6f27482b20f306cae9b4116fd7cc9ba3e5d0d..1c1998554db65dce21e63e4c1eebe072b1969046 100644 (file)
--- a/src/secrets/patterns/types.ts
+++ b/src/secrets/patterns/types.ts
@@ -18,7 +18,10 @@ export interface SecretsMatch {
    count: number;
  }
  
-export interface SecretsRedaction {
+/**
+ * Location of a detected secret in text
+ */
+export interface SecretLocation {
    start: number;
    end: number;
    type: SecretEntityType;
@@ -27,7 +30,18 @@ export interface SecretsRedaction {
  export interface SecretsDetectionResult {
    detected: boolean;
    matches: SecretsMatch[];
-  redactions?: SecretsRedaction[];
+  locations?: SecretLocation[];
+}
+
+/**
+ * Per-message, per-part secrets detection result
+ * Structure: messageLocations[msgIdx][partIdx] = locations for that part
+ */
+export interface MessageSecretsResult {
+  detected: boolean;
+  matches: SecretsMatch[];
+  /** Per-message, per-part secret locations */
+  messageLocations: SecretLocation[][][];
  }
  
  /**
diff --git a/src/secrets/patterns/utils.ts b/src/secrets/patterns/utils.ts

index 6d124d286d502af5cbfb131c30cf6d93b35325ea..58f14fad98210be82f331f2d0c5a2b45bae3dd74 100644 (file)
--- a/src/secrets/patterns/utils.ts
+++ b/src/secrets/patterns/utils.ts
@@ -1,14 +1,14 @@
-import type { SecretsMatch, SecretsRedaction } from "./types";
+import type { SecretLocation, SecretsMatch } from "./types";
  
  /**
- * Helper to detect secrets matching a pattern and collect matches/redactions
+ * Helper to detect secrets matching a pattern and collect matches/locations
   */
  export function detectPattern(
    text: string,
    pattern: RegExp,
    entityType: string,
    matches: SecretsMatch[],
-  redactions: SecretsRedaction[],
+  locations: SecretLocation[],
    existingPositions?: Set<number>,
  ): number {
    let count = 0;
@@ -19,10 +19,10 @@ export function detectPattern(
  
        count++;
        existingPositions?.add(match.index);
-      redactions.push({
+      locations.push({
          start: match.index,
          end: match.index + match[0].length,
-        type: entityType as SecretsRedaction["type"],
+        type: entityType as SecretLocation["type"],
        });
      }
    }
diff --git a/src/secrets/redact.test.ts b/src/secrets/redact.test.ts

deleted file mode 100644 (file)

index c865705..0000000
--- a/src/secrets/redact.test.ts
+++ /dev/null
@@ -1,368 +0,0 @@
-import { describe, expect, test } from "bun:test";
-import type { SecretsRedaction } from "./detect";
-import {
-  createRedactionContext,
-  flushRedactionBuffer,
-  redactMessagesSecrets,
-  redactSecrets,
-  unredactResponse,
-  unredactSecrets,
-  unredactStreamChunk,
-} from "./redact";
-
-const sampleSecret = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx";
-
-describe("redactSecrets", () => {
-  test("returns original text when no redactions", () => {
-    const text = "Hello world";
-    const result = redactSecrets(text, []);
-    expect(result.redacted).toBe("Hello world");
-    expect(Object.keys(result.context.mapping)).toHaveLength(0);
-  });
-
-  test("redacts single secret", () => {
-    const text = `My API key is ${sampleSecret}`;
-    const redactions: SecretsRedaction[] = [
-      { start: 14, end: 14 + sampleSecret.length, type: "API_KEY_OPENAI" },
-    ];
-    const result = redactSecrets(text, redactions);
-
-    expect(result.redacted).toBe("My API key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-    expect(result.context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"]).toBe(sampleSecret);
-  });
-
-  test("redacts multiple secrets of same type", () => {
-    const text = `Key1: ${sampleSecret} Key2: ${sampleSecret}`;
-    const redactions: SecretsRedaction[] = [
-      { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" },
-      {
-        start: 6 + sampleSecret.length + 7,
-        end: 6 + sampleSecret.length * 2 + 7,
-        type: "API_KEY_OPENAI",
-      },
-    ];
-    const result = redactSecrets(text, redactions);
-
-    // Same secret value should get same placeholder
-    expect(result.redacted).toBe(
-      "Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]] Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
-    );
-    expect(Object.keys(result.context.mapping)).toHaveLength(1);
-  });
-
-  test("redacts multiple secrets of different types", () => {
-    const awsKey = "AKIAIOSFODNN7EXAMPLE";
-    const text = `OpenAI: ${sampleSecret} AWS: ${awsKey}`;
-    const redactions: SecretsRedaction[] = [
-      { start: 8, end: 8 + sampleSecret.length, type: "API_KEY_OPENAI" },
-      {
-        start: 8 + sampleSecret.length + 6,
-        end: 8 + sampleSecret.length + 6 + awsKey.length,
-        type: "API_KEY_AWS",
-      },
-    ];
-    const result = redactSecrets(text, redactions);
-
-    expect(result.redacted).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-    expect(result.redacted).toContain("[[SECRET_REDACTED_API_KEY_AWS_1]]");
-    expect(Object.keys(result.context.mapping)).toHaveLength(2);
-  });
-
-  test("preserves context across multiple calls", () => {
-    const context = createRedactionContext();
-    const text1 = `Key: ${sampleSecret}`;
-    const redactions1: SecretsRedaction[] = [
-      { start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" },
-    ];
-    redactSecrets(text1, redactions1, context);
-
-    const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
-    const text2 = `Another: ${anotherSecret}`;
-    const redactions2: SecretsRedaction[] = [
-      { start: 9, end: 9 + anotherSecret.length, type: "API_KEY_OPENAI" },
-    ];
-    const result2 = redactSecrets(text2, redactions2, context);
-
-    // Second secret should get incremented counter
-    expect(result2.redacted).toBe("Another: [[SECRET_REDACTED_API_KEY_OPENAI_2]]");
-    expect(Object.keys(context.mapping)).toHaveLength(2);
-  });
-});
-
-describe("unredactSecrets", () => {
-  test("returns original text when no mappings", () => {
-    const context = createRedactionContext();
-    const text = "Hello world";
-    const result = unredactSecrets(text, context);
-    expect(result).toBe("Hello world");
-  });
-
-  test("restores single secret", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const text = "My API key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]";
-    const result = unredactSecrets(text, context);
-
-    expect(result).toBe(`My API key is ${sampleSecret}`);
-  });
-
-  test("restores multiple secrets", () => {
-    const context = createRedactionContext();
-    const awsKey = "AKIAIOSFODNN7EXAMPLE";
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-    context.mapping["[[SECRET_REDACTED_API_KEY_AWS_1]]"] = awsKey;
-
-    const text =
-      "OpenAI: [[SECRET_REDACTED_API_KEY_OPENAI_1]] AWS: [[SECRET_REDACTED_API_KEY_AWS_1]]";
-    const result = unredactSecrets(text, context);
-
-    expect(result).toBe(`OpenAI: ${sampleSecret} AWS: ${awsKey}`);
-  });
-
-  test("restores repeated placeholders", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const text =
-      "Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]] Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]";
-    const result = unredactSecrets(text, context);
-
-    expect(result).toBe(`Key1: ${sampleSecret} Key2: ${sampleSecret}`);
-  });
-});
-
-describe("redact -> unredact roundtrip", () => {
-  test("preserves original data through roundtrip", () => {
-    const originalText = `
-Here are my credentials:
-OpenAI API Key: ${sampleSecret}
-Please store them securely.
-`;
-    const redactions: SecretsRedaction[] = [
-      {
-        start: originalText.indexOf(sampleSecret),
-        end: originalText.indexOf(sampleSecret) + sampleSecret.length,
-        type: "API_KEY_OPENAI",
-      },
-    ];
-
-    const { redacted, context } = redactSecrets(originalText, redactions);
-
-    // Verify secret is not in redacted text
-    expect(redacted).not.toContain(sampleSecret);
-    expect(redacted).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-
-    // Unredact and verify original is restored
-    const restored = unredactSecrets(redacted, context);
-    expect(restored).toBe(originalText);
-  });
-
-  test("handles empty redactions array", () => {
-    const text = "No secrets here";
-    const { redacted, context } = redactSecrets(text, []);
-    const restored = unredactSecrets(redacted, context);
-    expect(restored).toBe(text);
-  });
-});
-
-describe("redactMessagesSecrets", () => {
-  test("redacts secrets in multiple messages", () => {
-    const messages = [
-      { role: "user" as const, content: `My key is ${sampleSecret}` },
-      { role: "assistant" as const, content: "I'll help you with that." },
-    ];
-    const redactionsByMessage: SecretsRedaction[][] = [
-      [{ start: 10, end: 10 + sampleSecret.length, type: "API_KEY_OPENAI" }],
-      [],
-    ];
-
-    const { redacted, context } = redactMessagesSecrets(messages, redactionsByMessage);
-
-    expect(redacted[0].content).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-    expect(redacted[0].content).not.toContain(sampleSecret);
-    expect(redacted[1].content).toBe("I'll help you with that.");
-    expect(Object.keys(context.mapping)).toHaveLength(1);
-  });
-
-  test("preserves message roles", () => {
-    const messages = [
-      { role: "system" as const, content: "You are helpful" },
-      { role: "user" as const, content: `Key: ${sampleSecret}` },
-    ];
-    const redactionsByMessage: SecretsRedaction[][] = [
-      [],
-      [{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }],
-    ];
-
-    const { redacted } = redactMessagesSecrets(messages, redactionsByMessage);
-
-    expect(redacted[0].role).toBe("system");
-    expect(redacted[1].role).toBe("user");
-  });
-
-  test("shares context across messages", () => {
-    const messages = [
-      { role: "user" as const, content: `Key1: ${sampleSecret}` },
-      { role: "user" as const, content: `Key2: ${sampleSecret}` },
-    ];
-    const redactionsByMessage: SecretsRedaction[][] = [
-      [{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }],
-      [{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }],
-    ];
-
-    const { redacted, context } = redactMessagesSecrets(messages, redactionsByMessage);
-
-    // Same secret should get same placeholder across messages
-    expect(redacted[0].content).toBe("Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-    expect(redacted[1].content).toBe("Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-    expect(Object.keys(context.mapping)).toHaveLength(1);
-  });
-});
-
-describe("streaming unredact", () => {
-  test("unredacts complete placeholder in chunk", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const { output, remainingBuffer } = unredactStreamChunk(
-      "",
-      "Key: [[SECRET_REDACTED_API_KEY_OPENAI_1]] end",
-      context,
-    );
-
-    expect(output).toBe(`Key: ${sampleSecret} end`);
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("buffers partial placeholder", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const { output, remainingBuffer } = unredactStreamChunk("", "Key: [[SECRET_RED", context);
-
-    expect(output).toBe("Key: ");
-    expect(remainingBuffer).toBe("[[SECRET_RED");
-  });
-
-  test("completes buffered placeholder", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const { output, remainingBuffer } = unredactStreamChunk(
-      "[[SECRET_RED",
-      "ACTED_API_KEY_OPENAI_1]] done",
-      context,
-    );
-
-    expect(output).toBe(`${sampleSecret} done`);
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("handles text without placeholders", () => {
-    const context = createRedactionContext();
-
-    const { output, remainingBuffer } = unredactStreamChunk("", "Hello world", context);
-
-    expect(output).toBe("Hello world");
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("flushes remaining buffer", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const result = flushRedactionBuffer("<incomplete", context);
-    expect(result).toBe("<incomplete");
-  });
-
-  test("flushes empty buffer", () => {
-    const context = createRedactionContext();
-    const result = flushRedactionBuffer("", context);
-    expect(result).toBe("");
-  });
-});
-
-describe("unredactResponse", () => {
-  test("unredacts all choices in response", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const response = {
-      id: "test",
-      object: "chat.completion" as const,
-      created: Date.now(),
-      model: "gpt-4",
-      choices: [
-        {
-          index: 0,
-          message: {
-            role: "assistant" as const,
-            content: "Your key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
-          },
-          finish_reason: "stop" as const,
-        },
-      ],
-    };
-
-    const result = unredactResponse(response, context);
-    expect(result.choices[0].message.content).toBe(`Your key is ${sampleSecret}`);
-  });
-
-  test("handles multiple choices", () => {
-    const context = createRedactionContext();
-    context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
-    const response = {
-      id: "test",
-      object: "chat.completion" as const,
-      created: Date.now(),
-      model: "gpt-4",
-      choices: [
-        {
-          index: 0,
-          message: {
-            role: "assistant" as const,
-            content: "Choice 1: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
-          },
-          finish_reason: "stop" as const,
-        },
-        {
-          index: 1,
-          message: {
-            role: "assistant" as const,
-            content: "Choice 2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
-          },
-          finish_reason: "stop" as const,
-        },
-      ],
-    };
-
-    const result = unredactResponse(response, context);
-    expect(result.choices[0].message.content).toBe(`Choice 1: ${sampleSecret}`);
-    expect(result.choices[1].message.content).toBe(`Choice 2: ${sampleSecret}`);
-  });
-
-  test("preserves response structure", () => {
-    const context = createRedactionContext();
-    const response = {
-      id: "test-id",
-      object: "chat.completion" as const,
-      created: 12345,
-      model: "gpt-4-turbo",
-      choices: [
-        {
-          index: 0,
-          message: { role: "assistant" as const, content: "Hello" },
-          finish_reason: "stop" as const,
-        },
-      ],
-      usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
-    };
-
-    const result = unredactResponse(response, context);
-    expect(result.id).toBe("test-id");
-    expect(result.model).toBe("gpt-4-turbo");
-    expect(result.usage).toEqual({ prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 });
-  });
-});
diff --git a/src/secrets/redact.ts b/src/secrets/redact.ts

deleted file mode 100644 (file)

index 9526512..0000000
--- a/src/secrets/redact.ts
+++ /dev/null
@@ -1,211 +0,0 @@
-import { findPartialPlaceholderStart, generateSecretPlaceholder } from "../constants/placeholders";
-import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
-import { resolveOverlaps } from "../utils/conflict-resolver";
-import { extractTextContent } from "../utils/content";
-import type { SecretsRedaction } from "./detect";
-
-/**
- * Context for tracking secret redaction mappings
- * Similar to MaskingContext for PII but for secrets
- */
-export interface RedactionContext {
-  /** Maps placeholder -> original secret */
-  mapping: Record<string, string>;
-  /** Maps original secret -> placeholder */
-  reverseMapping: Record<string, string>;
-  /** Counter per secret type for sequential numbering */
-  counters: Record<string, number>;
-}
-
-export interface RedactionResult {
-  redacted: string;
-  context: RedactionContext;
-}
-
-/**
- * Creates a new redaction context for a request
- */
-export function createRedactionContext(): RedactionContext {
-  return {
-    mapping: {},
-    reverseMapping: {},
-    counters: {},
-  };
-}
-
-/**
- * Generates a placeholder for a secret type
- *
- * Format: [[SECRET_REDACTED_{TYPE}_{N}]] e.g. [[SECRET_REDACTED_API_KEY_OPENAI_1]]
- */
-function generatePlaceholder(secretType: string, context: RedactionContext): string {
-  const count = (context.counters[secretType] || 0) + 1;
-  context.counters[secretType] = count;
-
-  return generateSecretPlaceholder(secretType, count);
-}
-
-/**
- * Redacts secrets in text, replacing them with placeholders
- *
- * Stores mapping in context for later unredaction.
- * Redactions must be provided sorted by start position descending (as returned by detectSecrets).
- *
- * @param text - The text to redact secrets from
- * @param redactions - Array of redaction positions (sorted by start position descending)
- * @param context - Optional existing context to reuse (for multiple messages)
- */
-export function redactSecrets(
-  text: string,
-  redactions: SecretsRedaction[],
-  context?: RedactionContext,
-): RedactionResult {
-  const ctx = context || createRedactionContext();
-
-  if (redactions.length === 0) {
-    return { redacted: text, context: ctx };
-  }
-
-  // Resolve conflicts between overlapping redactions
-  const resolved = resolveOverlaps(redactions);
-
-  // First pass: sort by start position ascending to assign placeholders in order of appearance
-  const sortedByStart = [...resolved].sort((a, b) => a.start - b.start);
-
-  // Assign placeholders in order of appearance
-  const redactionPlaceholders = new Map<SecretsRedaction, string>();
-  for (const redaction of sortedByStart) {
-    const originalValue = text.slice(redaction.start, redaction.end);
-
-    // Check if we already have a placeholder for this exact value
-    let placeholder = ctx.reverseMapping[originalValue];
-
-    if (!placeholder) {
-      placeholder = generatePlaceholder(redaction.type, ctx);
-      ctx.mapping[placeholder] = originalValue;
-      ctx.reverseMapping[originalValue] = placeholder;
-    }
-
-    redactionPlaceholders.set(redaction, placeholder);
-  }
-
-  // Second pass: replace from end to start to maintain correct string positions
-  const sortedByEnd = [...resolved].sort((a, b) => b.start - a.start);
-
-  let result = text;
-  for (const redaction of sortedByEnd) {
-    const placeholder = redactionPlaceholders.get(redaction)!;
-    result = result.slice(0, redaction.start) + placeholder + result.slice(redaction.end);
-  }
-
-  return { redacted: result, context: ctx };
-}
-
-/**
- * Unredacts text by replacing placeholders with original secrets
- *
- * @param text - Text containing secret placeholders
- * @param context - Redaction context with mappings
- */
-export function unredactSecrets(text: string, context: RedactionContext): string {
-  let result = text;
-
-  // Sort placeholders by length descending to avoid partial replacements
-  const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length);
-
-  for (const placeholder of placeholders) {
-    const originalValue = context.mapping[placeholder];
-    // Replace all occurrences of the placeholder
-    result = result.split(placeholder).join(originalValue);
-  }
-
-  return result;
-}
-
-/**
- * Redacts secrets in multiple messages (for chat completions)
- *
- * @param messages - Chat messages to redact
- * @param redactionsByMessage - Redactions for each message (indexed by message position)
- */
-export function redactMessagesSecrets(
-  messages: ChatMessage[],
-  redactionsByMessage: SecretsRedaction[][],
-): { redacted: ChatMessage[]; context: RedactionContext } {
-  const context = createRedactionContext();
-
-  const redacted = messages.map((msg, i) => {
-    const redactions = redactionsByMessage[i] || [];
-    const text = extractTextContent(msg.content);
-    const { redacted: redactedContent } = redactSecrets(text, redactions, context);
-
-    // If original content was a string, return redacted string
-    // Otherwise return original content (arrays are handled in proxy.ts)
-    return { ...msg, content: typeof msg.content === "string" ? redactedContent : msg.content };
-  });
-
-  return { redacted, context };
-}
-
-/**
- * Streaming unredact helper - processes chunks and unredacts when complete placeholders are found
- *
- * Similar to PII unmasking but for secrets.
- * Returns the unredacted portion and any remaining buffer that might contain partial placeholders.
- */
-export function unredactStreamChunk(
-  buffer: string,
-  newChunk: string,
-  context: RedactionContext,
-): { output: string; remainingBuffer: string } {
-  const combined = buffer + newChunk;
-
-  const partialStart = findPartialPlaceholderStart(combined);
-
-  if (partialStart === -1) {
-    // No partial placeholder, safe to unredact everything
-    return {
-      output: unredactSecrets(combined, context),
-      remainingBuffer: "",
-    };
-  }
-
-  // Partial placeholder detected, buffer it
-  const safeToProcess = combined.slice(0, partialStart);
-  const toBuffer = combined.slice(partialStart);
-
-  return {
-    output: unredactSecrets(safeToProcess, context),
-    remainingBuffer: toBuffer,
-  };
-}
-
-/**
- * Flushes remaining buffer at end of stream
- */
-export function flushRedactionBuffer(buffer: string, context: RedactionContext): string {
-  if (!buffer) return "";
-  return unredactSecrets(buffer, context);
-}
-
-/**
- * Unredacts a chat completion response by replacing placeholders in all choices
- */
-export function unredactResponse(
-  response: ChatCompletionResponse,
-  context: RedactionContext,
-): ChatCompletionResponse {
-  return {
-    ...response,
-    choices: response.choices.map((choice) => ({
-      ...choice,
-      message: {
-        ...choice.message,
-        content:
-          typeof choice.message.content === "string"
-            ? unredactSecrets(choice.message.content, context)
-            : choice.message.content,
-      },
-    })),
-  };
-}
diff --git a/src/services/decision.test.ts b/src/services/decision.test.ts

index 3d4985a8698bfc8c5e6a772a5fae9768c7efdd67..8a87c249ebd7b2e983a8b89d9b6d67e51d25a5f6 100644 (file)
--- a/src/services/decision.test.ts
+++ b/src/services/decision.test.ts
@@ -1,6 +1,6 @@
  import { describe, expect, test } from "bun:test";
-import type { SecretsDetectionResult, SecretsMatch } from "../secrets/detect";
-import type { PIIDetectionResult } from "./pii-detector";
+import type { PIIDetectionResult } from "../pii/detect";
+import type { MessageSecretsResult, SecretsMatch } from "../secrets/detect";
  
  /**
   * Pure routing logic extracted for testing
@@ -8,8 +8,8 @@ import type { PIIDetectionResult } from "./pii-detector";
   */
  function decideRoute(
    piiResult: PIIDetectionResult,
-  secretsResult?: SecretsDetectionResult,
-  secretsAction?: "block" | "redact" | "route_local",
+  secretsResult?: MessageSecretsResult,
+  secretsAction?: "block" | "mask" | "route_local",
  ): { provider: "openai" | "local"; reason: string } {
    // Check for secrets route_local action first (takes precedence)
    if (secretsResult?.detected && secretsAction === "route_local") {
@@ -21,7 +21,7 @@ function decideRoute(
    }
  
    if (piiResult.hasPII) {
-    const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))];
+    const entityTypes = [...new Set(piiResult.allEntities.map((e) => e.entity_type))];
      return {
        provider: "local",
        reason: `PII detected: ${entityTypes.join(", ")}`,
@@ -41,7 +41,7 @@ function createPIIResult(
    hasPII: boolean,
    entities: Array<{ entity_type: string }> = [],
  ): PIIDetectionResult {
-  const newEntities = entities.map((e) => ({
+  const allEntities = entities.map((e) => ({
      entity_type: e.entity_type,
      start: 0,
      end: 10,
@@ -50,8 +50,8 @@ function createPIIResult(
  
    return {
      hasPII,
-    newEntities,
-    entitiesByMessage: [newEntities],
+    allEntities,
+    messageEntities: [[allEntities]],
      language: "en",
      languageFallback: false,
      scanTimeMs: 50,
@@ -104,16 +104,16 @@ describe("decideRoute", () => {
  });
  
  /**
- * Helper to create a mock SecretsDetectionResult
+ * Helper to create a mock MessageSecretsResult
   */
  function createSecretsResult(
    detected: boolean,
    matches: SecretsMatch[] = [],
-): SecretsDetectionResult {
+): MessageSecretsResult {
    return {
      detected,
      matches,
-    redactions: matches.map((m, i) => ({ start: i * 100, end: i * 100 + 50, type: m.type })),
+    messageLocations: [],
    };
  }
  
@@ -175,14 +175,14 @@ describe("decideRoute with secrets", () => {
      });
    });
  
-  describe("with redact action", () => {
-    test("ignores secrets detection for routing (redacted before PII check)", () => {
+  describe("with mask action", () => {
+    test("ignores secrets detection for routing (masked before PII check)", () => {
        const piiResult = createPIIResult(false);
        const secretsResult = createSecretsResult(true, [{ type: "BEARER_TOKEN", count: 1 }]);
  
-      const result = decideRoute(piiResult, secretsResult, "redact");
+      const result = decideRoute(piiResult, secretsResult, "mask");
  
-      // With redact action, we route based on PII, not secrets
+      // With mask action, we route based on PII, not secrets
        expect(result.provider).toBe("openai");
        expect(result.reason).toBe("No PII detected");
      });
diff --git a/src/services/decision.ts b/src/services/decision.ts

index da1bdbff60e1d178211b260c54b54d1c84b815ff..a6bef8b255f13eb5921c3c669c0c09ac69654285 100644 (file)
--- a/src/services/decision.ts
+++ b/src/services/decision.ts
@@ -1,8 +1,9 @@
  import { type Config, getConfig } from "../config";
-import type { SecretsDetectionResult } from "../secrets/detect";
-import { type ChatMessage, LLMClient } from "../services/llm-client";
-import { createMaskingContext, type MaskingContext, maskMessages } from "../services/masking";
-import { getPIIDetector, type PIIDetectionResult } from "../services/pii-detector";
+import { getPIIDetector, type PIIDetectionResult } from "../pii/detect";
+import { createMaskingContext, maskMessages } from "../pii/mask";
+import type { MessageSecretsResult } from "../secrets/detect";
+import type { PlaceholderContext } from "../utils/message-transform";
+import { type ChatMessage, LLMClient } from "./llm-client";
  
  /**
   * Routing decision result for route mode
@@ -23,7 +24,7 @@ export interface MaskDecision {
    reason: string;
    piiResult: PIIDetectionResult;
    maskedMessages: ChatMessage[];
-  maskingContext: MaskingContext;
+  maskingContext: PlaceholderContext;
  }
  
  export type RoutingDecision = RouteDecision | MaskDecision;
@@ -61,13 +62,13 @@ export class Router {
     */
    async decide(
      messages: ChatMessage[],
-    secretsResult?: SecretsDetectionResult,
+    secretsResult?: MessageSecretsResult,
    ): Promise<RoutingDecision> {
      const detector = getPIIDetector();
      const piiResult = await detector.analyzeMessages(messages);
  
      if (this.config.mode === "mask") {
-      return await this.decideMask(messages, piiResult);
+      return this.decideMask(messages, piiResult);
      }
  
      return this.decideRoute(piiResult, secretsResult);
@@ -82,7 +83,7 @@ export class Router {
     */
    private decideRoute(
      piiResult: PIIDetectionResult,
-    secretsResult?: SecretsDetectionResult,
+    secretsResult?: MessageSecretsResult,
    ): RouteDecision {
      // Check for secrets route_local action first (takes precedence)
      if (secretsResult?.detected && this.config.secrets_detection.action === "route_local") {
@@ -97,7 +98,7 @@ export class Router {
  
      // Route based on PII detection
      if (piiResult.hasPII) {
-      const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))];
+      const entityTypes = [...new Set(piiResult.allEntities.map((e) => e.entity_type))];
        return {
          mode: "route",
          provider: "local",
@@ -115,10 +116,7 @@ export class Router {
      };
    }
  
-  private async decideMask(
-    messages: ChatMessage[],
-    piiResult: PIIDetectionResult,
-  ): Promise<MaskDecision> {
+  private decideMask(messages: ChatMessage[], piiResult: PIIDetectionResult): MaskDecision {
      if (!piiResult.hasPII) {
        return {
          mode: "mask",
@@ -130,9 +128,9 @@ export class Router {
        };
      }
  
-    const { masked, context } = maskMessages(messages, piiResult.entitiesByMessage);
+    const { masked, context } = maskMessages(messages, piiResult);
  
-    const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))];
+    const entityTypes = [...new Set(piiResult.allEntities.map((e) => e.entity_type))];
  
      return {
        mode: "mask",
diff --git a/src/services/language-detector.ts b/src/services/language-detector.ts

index 44424320a049ae96cc522c77d81f29ab370feae9..dd485411fd383ea636ba1eff30f0ccbdc93063e8 100644 (file)
--- a/src/services/language-detector.ts
+++ b/src/services/language-detector.ts
@@ -1,32 +1,8 @@
  import eld from "eld/small";
  import { getConfig } from "../config";
+import type { SupportedLanguage } from "../constants/languages";
  
-// All 24 spaCy languages with trained pipelines
-export type SupportedLanguage =
-  | "ca"
-  | "zh"
-  | "hr"
-  | "da"
-  | "nl"
-  | "en"
-  | "fi"
-  | "fr"
-  | "de"
-  | "el"
-  | "it"
-  | "ja"
-  | "ko"
-  | "lt"
-  | "mk"
-  | "nb"
-  | "pl"
-  | "pt"
-  | "ro"
-  | "ru"
-  | "sl"
-  | "es"
-  | "sv"
-  | "uk";
+export type { SupportedLanguage } from "../constants/languages";
  
  export interface LanguageDetectionResult {
    language: SupportedLanguage;
diff --git a/src/services/masking.test.ts b/src/services/masking.test.ts

deleted file mode 100644 (file)

index bbfa58e..0000000
--- a/src/services/masking.test.ts
+++ /dev/null
@@ -1,656 +0,0 @@
-import { describe, expect, test } from "bun:test";
-import type { MaskingConfig } from "../config";
-import type { ChatMessage } from "./llm-client";
-import {
-  createMaskingContext,
-  flushStreamBuffer,
-  mask,
-  maskMessages,
-  unmask,
-  unmaskResponse,
-  unmaskStreamChunk,
-} from "./masking";
-import type { PIIEntity } from "./pii-detector";
-
-const defaultConfig: MaskingConfig = {
-  show_markers: false,
-  marker_text: "[protected]",
-};
-
-const configWithMarkers: MaskingConfig = {
-  show_markers: true,
-  marker_text: "[protected]",
-};
-
-describe("mask", () => {
-  test("returns original text when no entities", () => {
-    const result = mask("Hello world", []);
-    expect(result.masked).toBe("Hello world");
-    expect(Object.keys(result.context.mapping)).toHaveLength(0);
-  });
-
-  test("masks single email entity", () => {
-    // "Contact: john@example.com please"
-    //           ^9             ^25
-    const entities: PIIEntity[] = [{ entity_type: "EMAIL_ADDRESS", start: 9, end: 25, score: 1.0 }];
-
-    const result = mask("Contact: john@example.com please", entities);
-
-    expect(result.masked).toBe("Contact: [[EMAIL_ADDRESS_1]] please");
-    expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("john@example.com");
-  });
-
-  test("masks multiple entities of same type", () => {
-    const text = "Emails: a@b.com and c@d.com";
-    const entities: PIIEntity[] = [
-      { entity_type: "EMAIL_ADDRESS", start: 8, end: 15, score: 1.0 },
-      { entity_type: "EMAIL_ADDRESS", start: 20, end: 27, score: 1.0 },
-    ];
-
-    const result = mask(text, entities);
-
-    expect(result.masked).toBe("Emails: [[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_2]]");
-    expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("a@b.com");
-    expect(result.context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("c@d.com");
-  });
-
-  test("masks multiple entity types", () => {
-    const text = "Hans Müller: hans@firma.de";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 0, end: 11, score: 0.9 },
-      { entity_type: "EMAIL_ADDRESS", start: 13, end: 26, score: 1.0 },
-    ];
-
-    const result = mask(text, entities);
-
-    expect(result.masked).toBe("[[PERSON_1]]: [[EMAIL_ADDRESS_1]]");
-    expect(result.context.mapping["[[PERSON_1]]"]).toBe("Hans Müller");
-    expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("hans@firma.de");
-  });
-
-  test("reuses placeholder for duplicate values", () => {
-    const text = "a@b.com and again a@b.com";
-    const entities: PIIEntity[] = [
-      { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 },
-      { entity_type: "EMAIL_ADDRESS", start: 18, end: 25, score: 1.0 },
-    ];
-
-    const result = mask(text, entities);
-
-    // Same value should get same placeholder
-    expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and again [[EMAIL_ADDRESS_1]]");
-    expect(Object.keys(result.context.mapping)).toHaveLength(1);
-  });
-
-  test("handles adjacent entities", () => {
-    const text = "HansMüller";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 0, end: 4, score: 0.9 },
-      { entity_type: "PERSON", start: 4, end: 10, score: 0.9 },
-    ];
-
-    const result = mask(text, entities);
-
-    expect(result.masked).toBe("[[PERSON_1]][[PERSON_2]]");
-  });
-
-  test("preserves context across calls", () => {
-    const context = createMaskingContext();
-
-    const result1 = mask(
-      "Email: a@b.com",
-      [{ entity_type: "EMAIL_ADDRESS", start: 7, end: 14, score: 1.0 }],
-      context,
-    );
-
-    expect(result1.masked).toBe("Email: [[EMAIL_ADDRESS_1]]");
-
-    const result2 = mask(
-      "Another: c@d.com",
-      [{ entity_type: "EMAIL_ADDRESS", start: 9, end: 16, score: 1.0 }],
-      context,
-    );
-
-    // Should continue numbering
-    expect(result2.masked).toBe("Another: [[EMAIL_ADDRESS_2]]");
-    expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("a@b.com");
-    expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("c@d.com");
-  });
-});
-
-describe("unmask", () => {
-  test("returns original text when no mappings", () => {
-    const context = createMaskingContext();
-    const result = unmask("Hello world", context, defaultConfig);
-    expect(result).toBe("Hello world");
-  });
-
-  test("restores single placeholder", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
-
-    const result = unmask("Reply to [[EMAIL_ADDRESS_1]]", context, defaultConfig);
-    expect(result).toBe("Reply to john@example.com");
-  });
-
-  test("restores multiple placeholders", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "Hans Müller";
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "hans@firma.de";
-
-    const result = unmask(
-      "Hello [[PERSON_1]], your email [[EMAIL_ADDRESS_1]] is confirmed",
-      context,
-      defaultConfig,
-    );
-    expect(result).toBe("Hello Hans Müller, your email hans@firma.de is confirmed");
-  });
-
-  test("restores repeated placeholders", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    const result = unmask("[[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_1]]", context, defaultConfig);
-    expect(result).toBe("test@test.com and test@test.com");
-  });
-
-  test("adds markers when configured", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
-
-    const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, configWithMarkers);
-    expect(result).toBe("Email: [protected]john@example.com");
-  });
-
-  test("handles partial placeholder (no match)", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    const result = unmask("Text with [[EMAIL_ADDRESS_2]]", context, defaultConfig);
-    expect(result).toBe("Text with [[EMAIL_ADDRESS_2]]"); // No match, unchanged
-  });
-});
-
-describe("mask -> unmask roundtrip", () => {
-  test("preserves original data through roundtrip", () => {
-    const originalText = "Contact Hans Müller at hans@firma.de or call +49123456789";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 8, end: 19, score: 0.9 },
-      { entity_type: "EMAIL_ADDRESS", start: 23, end: 36, score: 1.0 },
-      { entity_type: "PHONE_NUMBER", start: 45, end: 57, score: 0.95 },
-    ];
-
-    const { masked, context } = mask(originalText, entities);
-
-    // Verify masking worked
-    expect(masked).not.toContain("Hans Müller");
-    expect(masked).not.toContain("hans@firma.de");
-    expect(masked).not.toContain("+49123456789");
-
-    // Simulate LLM response that echoes placeholders
-    const llmResponse = `I see your contact info: ${masked.match(/\[\[PERSON_1\]\]/)?.[0]}, email ${masked.match(/\[\[EMAIL_ADDRESS_1\]\]/)?.[0]}`;
-
-    const unmasked = unmask(llmResponse, context, defaultConfig);
-
-    expect(unmasked).toContain("Hans Müller");
-    expect(unmasked).toContain("hans@firma.de");
-  });
-
-  test("handles empty entities array", () => {
-    const text = "No PII here";
-    const { masked, context } = mask(text, []);
-    const unmasked = unmask(masked, context, defaultConfig);
-
-    expect(unmasked).toBe(text);
-  });
-});
-
-describe("maskMessages", () => {
-  test("masks multiple messages", () => {
-    const messages: ChatMessage[] = [
-      { role: "user", content: "My email is test@example.com" },
-      { role: "assistant", content: "Got it" },
-      { role: "user", content: "Also john@test.com" },
-    ];
-
-    const entitiesByMessage: PIIEntity[][] = [
-      [{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 1.0 }],
-      [],
-      [{ entity_type: "EMAIL_ADDRESS", start: 5, end: 18, score: 1.0 }],
-    ];
-
-    const { masked, context } = maskMessages(messages, entitiesByMessage);
-
-    expect(masked[0].content).toBe("My email is [[EMAIL_ADDRESS_1]]");
-    expect(masked[1].content).toBe("Got it");
-    expect(masked[2].content).toBe("Also [[EMAIL_ADDRESS_2]]");
-
-    expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("test@example.com");
-    expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("john@test.com");
-  });
-
-  test("preserves message roles", () => {
-    const messages: ChatMessage[] = [
-      { role: "system", content: "You are helpful" },
-      { role: "user", content: "Hi" },
-    ];
-
-    const { masked } = maskMessages(messages, [[], []]);
-
-    expect(masked[0].role).toBe("system");
-    expect(masked[1].role).toBe("user");
-  });
-});
-
-describe("streaming unmask", () => {
-  test("unmasks complete placeholder in chunk", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "",
-      "Hello [[EMAIL_ADDRESS_1]]!",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("Hello test@test.com!");
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("buffers partial placeholder", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "",
-      "Hello [[EMAIL_ADD",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("Hello ");
-    expect(remainingBuffer).toBe("[[EMAIL_ADD");
-  });
-
-  test("completes buffered placeholder", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "[[EMAIL_ADD",
-      "RESS_1]] there",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("test@test.com there");
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("handles text without placeholders", () => {
-    const context = createMaskingContext();
-
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "",
-      "Just normal text",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("Just normal text");
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("flushes remaining buffer", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    // Partial that never completes
-    const flushed = flushStreamBuffer("[[EMAIL_ADD", context, defaultConfig);
-
-    // Should return as-is since no complete placeholder
-    expect(flushed).toBe("[[EMAIL_ADD");
-  });
-});
-
-describe("unmaskResponse", () => {
-  test("unmasks all choices in response", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-    context.mapping["[[PERSON_1]]"] = "John Doe";
-
-    const response = {
-      id: "chatcmpl-123",
-      object: "chat.completion" as const,
-      created: 1234567890,
-      model: "gpt-4",
-      choices: [
-        {
-          index: 0,
-          message: {
-            role: "assistant" as const,
-            content: "Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]",
-          },
-          finish_reason: "stop" as const,
-        },
-      ],
-      usage: {
-        prompt_tokens: 10,
-        completion_tokens: 20,
-        total_tokens: 30,
-      },
-    };
-
-    const result = unmaskResponse(response, context, defaultConfig);
-
-    expect(result.choices[0].message.content).toBe("Contact John Doe at test@test.com");
-    expect(result.id).toBe("chatcmpl-123");
-    expect(result.model).toBe("gpt-4");
-  });
-
-  test("handles multiple choices", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "a@b.com";
-
-    const response = {
-      id: "chatcmpl-456",
-      object: "chat.completion" as const,
-      created: 1234567890,
-      model: "gpt-4",
-      choices: [
-        {
-          index: 0,
-          message: { role: "assistant" as const, content: "First: [[EMAIL_ADDRESS_1]]" },
-          finish_reason: "stop" as const,
-        },
-        {
-          index: 1,
-          message: { role: "assistant" as const, content: "Second: [[EMAIL_ADDRESS_1]]" },
-          finish_reason: "stop" as const,
-        },
-      ],
-    };
-
-    const result = unmaskResponse(response, context, defaultConfig);
-
-    expect(result.choices[0].message.content).toBe("First: a@b.com");
-    expect(result.choices[1].message.content).toBe("Second: a@b.com");
-  });
-
-  test("preserves response structure", () => {
-    const context = createMaskingContext();
-    const response = {
-      id: "test-id",
-      object: "chat.completion" as const,
-      created: 999,
-      model: "test-model",
-      choices: [
-        {
-          index: 0,
-          message: { role: "assistant" as const, content: "No placeholders" },
-          finish_reason: null,
-        },
-      ],
-      usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 },
-    };
-
-    const result = unmaskResponse(response, context, defaultConfig);
-
-    expect(result.id).toBe("test-id");
-    expect(result.object).toBe("chat.completion");
-    expect(result.created).toBe(999);
-    expect(result.model).toBe("test-model");
-    expect(result.usage).toEqual({ prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 });
-  });
-});
-
-describe("edge cases", () => {
-  test("handles unicode in masked text", () => {
-    const text = "Kontakt: François Müller";
-    const entities: PIIEntity[] = [{ entity_type: "PERSON", start: 9, end: 24, score: 0.9 }];
-
-    const { masked, context } = mask(text, entities);
-    expect(masked).toBe("Kontakt: [[PERSON_1]]");
-
-    const unmasked = unmask(masked, context, defaultConfig);
-    expect(unmasked).toBe("Kontakt: François Müller");
-  });
-
-  test("handles empty text", () => {
-    const { masked, context } = mask("", []);
-    expect(masked).toBe("");
-    expect(unmask("", context, defaultConfig)).toBe("");
-  });
-
-  test("handles placeholder-like text that is not a real placeholder", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
-    const result = unmask("Use [[UNKNOWN_1]] format", context, defaultConfig);
-    expect(result).toBe("Use [[UNKNOWN_1]] format");
-  });
-});
-
-describe("HTML context handling (issue #36)", () => {
-  test("unmasks placeholders in HTML without encoding issues", () => {
-    // With [[]] format, placeholders are not affected by HTML encoding
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah.chen@hospital.org";
-
-    // [[]] brackets don't get HTML-encoded, so they work directly
-    const htmlResponse = `<p>Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]</p>`;
-
-    const result = unmask(htmlResponse, context, defaultConfig);
-
-    expect(result).toBe("<p>Contact Dr. Sarah Chen at sarah.chen@hospital.org</p>");
-  });
-
-  test("unmasks placeholders in HTML title attributes", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "Jane Smith";
-
-    // [[]] works in HTML attributes without encoding
-    const htmlWithAttr = `<span title="Contact [[PERSON_1]]">Click here</span>`;
-
-    const result = unmask(htmlWithAttr, context, defaultConfig);
-
-    expect(result).toBe(`<span title="Contact Jane Smith">Click here</span>`);
-  });
-
-  test("unmasks placeholders in mailto links", () => {
-    const context = createMaskingContext();
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@example.com";
-
-    const mailtoHtml = `<a href="mailto:[[EMAIL_ADDRESS_1]]">Send email</a>`;
-
-    const result = unmask(mailtoHtml, context, defaultConfig);
-
-    expect(result).toBe(`<a href="mailto:test@example.com">Send email</a>`);
-  });
-
-  test("handles multiple occurrences of same placeholder in HTML", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "Alice";
-
-    const response = `<p>[[PERSON_1]] said hello.</p><p>[[PERSON_1]] waved goodbye.</p>`;
-
-    const result = unmask(response, context, defaultConfig);
-
-    expect(result).toBe("<p>Alice said hello.</p><p>Alice waved goodbye.</p>");
-  });
-
-  test("works with complex HTML structures", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
-    context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah@hospital.org";
-    context.mapping["[[PHONE_NUMBER_1]]"] = "+1-555-0123";
-
-    const complexHtml = `
-      <div class="profile">
-        <h1>[[PERSON_1]]</h1>
-        <a href="mailto:[[EMAIL_ADDRESS_1]]">[[EMAIL_ADDRESS_1]]</a>
-        <span data-phone="[[PHONE_NUMBER_1]]">Call: [[PHONE_NUMBER_1]]</span>
-      </div>
-    `;
-
-    const result = unmask(complexHtml, context, defaultConfig);
-
-    expect(result).toContain("Dr. Sarah Chen");
-    expect(result).toContain("sarah@hospital.org");
-    expect(result).toContain("+1-555-0123");
-    expect(result).not.toContain("[[");
-    expect(result).not.toContain("]]");
-  });
-});
-
-describe("streaming with [[]] placeholders (issue #36)", () => {
-  test("handles complete placeholder in chunk", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "John Doe";
-
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "",
-      "Hello [[PERSON_1]]!",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("Hello John Doe!");
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("buffers partial placeholder at end of chunk", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "John Doe";
-
-    // Partial placeholder at end: [[PERS
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "",
-      "Hello [[PERS",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("Hello ");
-    expect(remainingBuffer).toBe("[[PERS");
-  });
-
-  test("completes buffered placeholder across chunks", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "John Doe";
-
-    const { output, remainingBuffer } = unmaskStreamChunk(
-      "[[PERS",
-      "ON_1]] there",
-      context,
-      defaultConfig,
-    );
-
-    expect(output).toBe("John Doe there");
-    expect(remainingBuffer).toBe("");
-  });
-
-  test("handles placeholder split at closing brackets", () => {
-    const context = createMaskingContext();
-    context.mapping["[[PERSON_1]]"] = "John Doe";
-
-    // First chunk ends with incomplete closing
-    const result1 = unmaskStreamChunk("", "Hello [[PERSON_1]", context, defaultConfig);
-    expect(result1.output).toBe("Hello ");
-    expect(result1.remainingBuffer).toBe("[[PERSON_1]");
-
-    // Second chunk completes it
-    const result2 = unmaskStreamChunk(result1.remainingBuffer, "] world", context, defaultConfig);
-    expect(result2.output).toBe("John Doe world");
-    expect(result2.remainingBuffer).toBe("");
-  });
-});
-
-describe("overlapping entities (issue #33)", () => {
-  test("handles overlapping entities with same start - keeps longer", () => {
-    // Bug: Presidio returns both "Eric" and "Eric's" as separate PERSON entities
-    const text = "Given Eric's feedback";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 6, end: 10, score: 0.85 }, // "Eric"
-      { entity_type: "PERSON", start: 6, end: 12, score: 0.8 }, // "Eric's"
-    ];
-
-    const { masked, context } = mask(text, entities);
-
-    // Longer span wins when same start position
-    expect(masked).toBe("Given [[PERSON_1]] feedback");
-    expect(context.mapping["[[PERSON_1]]"]).toBe("Eric's");
-  });
-
-  test("handles partially overlapping entities of same type - merges them", () => {
-    const text = "Contact John Smith Jones please";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 8, end: 18, score: 0.9 }, // "John Smith"
-      { entity_type: "PERSON", start: 13, end: 25, score: 0.7 }, // "Smith Jones"
-    ];
-
-    const { masked } = mask(text, entities);
-
-    // Presidio behavior: same-type overlapping entities are MERGED
-    // Merged entity spans 8-25 ("John Smith Jones"), keeps highest score
-    expect(masked).toBe("Contact [[PERSON_1]]please");
-  });
-
-  test("handles nested entities - keeps outer (starts first)", () => {
-    const text = "Dr. John Smith is here";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 0, end: 14, score: 0.9 }, // "Dr. John Smith"
-      { entity_type: "PERSON", start: 4, end: 8, score: 0.85 }, // "John"
-    ];
-
-    const { masked } = mask(text, entities);
-
-    expect(masked).toBe("[[PERSON_1]] is here");
-  });
-
-  test("keeps adjacent non-overlapping entities", () => {
-    const text = "HansMüller";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, // "Hans"
-      { entity_type: "PERSON", start: 4, end: 10, score: 0.9 }, // "Müller"
-    ];
-
-    const { masked } = mask(text, entities);
-
-    expect(masked).toBe("[[PERSON_1]][[PERSON_2]]");
-  });
-
-  test("handles multiple independent overlap groups", () => {
-    const text = "Laura Smith met Eric's friend Bob Jones Jr";
-    const entities: PIIEntity[] = [
-      // Group 1: same start - longer wins
-      { entity_type: "PERSON", start: 0, end: 5, score: 0.85 }, // "Laura"
-      { entity_type: "PERSON", start: 0, end: 11, score: 0.9 }, // "Laura Smith"
-      // Group 2: same start - longer wins
-      { entity_type: "PERSON", start: 16, end: 20, score: 0.85 }, // "Eric"
-      { entity_type: "PERSON", start: 16, end: 22, score: 0.8 }, // "Eric's"
-      // Group 3: same start - longer wins
-      { entity_type: "PERSON", start: 30, end: 33, score: 0.7 }, // "Bob"
-      { entity_type: "PERSON", start: 30, end: 42, score: 0.9 }, // "Bob Jones Jr"
-    ];
-
-    const { masked } = mask(text, entities);
-
-    expect(masked).toBe("[[PERSON_1]] met [[PERSON_2]] friend [[PERSON_3]]");
-  });
-
-  test("entity consistency - same value gets same placeholder", () => {
-    const text = "Eric met Eric again";
-    const entities: PIIEntity[] = [
-      { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, // "Eric"
-      { entity_type: "PERSON", start: 9, end: 13, score: 0.9 }, // "Eric"
-    ];
-
-    const { masked, context } = mask(text, entities);
-
-    expect(masked).toBe("[[PERSON_1]] met [[PERSON_1]] again");
-    expect(Object.keys(context.mapping)).toHaveLength(1);
-  });
-});
diff --git a/src/services/masking.ts b/src/services/masking.ts

deleted file mode 100644 (file)

index b9dbeb9..0000000
--- a/src/services/masking.ts
+++ /dev/null
@@ -1,247 +0,0 @@
-import type { MaskingConfig } from "../config";
-import {
-  findPartialPlaceholderStart,
-  generatePlaceholder as generatePlaceholderFromFormat,
-  PII_PLACEHOLDER_FORMAT,
-} from "../constants/placeholders";
-import { resolveConflicts } from "../utils/conflict-resolver";
-import { extractTextContent } from "../utils/content";
-import type { ChatCompletionResponse, ChatMessage } from "./llm-client";
-import type { PIIEntity } from "./pii-detector";
-
-export interface MaskingContext {
-  mapping: Record<string, string>;
-  reverseMapping: Record<string, string>;
-  counters: Record<string, number>;
-}
-
-export interface MaskResult {
-  masked: string;
-  context: MaskingContext;
-}
-
-/**
- * Creates a new masking context for a request
- */
-export function createMaskingContext(): MaskingContext {
-  return {
-    mapping: {},
-    reverseMapping: {},
-    counters: {},
-  };
-}
-
-/**
- * Generates a placeholder for a PII entity type
- */
-function generatePlaceholder(entityType: string, context: MaskingContext): string {
-  const count = (context.counters[entityType] || 0) + 1;
-  context.counters[entityType] = count;
-
-  return generatePlaceholderFromFormat(PII_PLACEHOLDER_FORMAT, entityType, count);
-}
-
-/**
- * Masks PII entities in text, replacing them with placeholders
- *
- * First assigns placeholders in order of appearance (start position ascending),
- * then replaces from end to start to maintain correct string positions
- */
-export function mask(text: string, entities: PIIEntity[], context?: MaskingContext): MaskResult {
-  const ctx = context || createMaskingContext();
-
-  if (entities.length === 0) {
-    return { masked: text, context: ctx };
-  }
-
-  // Resolve conflicts between overlapping entities using Presidio's algorithm
-  // Presidio can return overlapping entities (e.g., "Eric" and "Eric's")
-  const resolved = resolveConflicts(entities);
-
-  // First pass: sort by start position ascending to assign placeholders in order
-  const sortedByStart = [...resolved].sort((a, b) => a.start - b.start);
-
-  // Assign placeholders in order of appearance
-  const entityPlaceholders = new Map<PIIEntity, string>();
-  for (const entity of sortedByStart) {
-    const originalValue = text.slice(entity.start, entity.end);
-
-    // Check if we already have a placeholder for this exact value
-    let placeholder = ctx.reverseMapping[originalValue];
-
-    if (!placeholder) {
-      placeholder = generatePlaceholder(entity.entity_type, ctx);
-      ctx.mapping[placeholder] = originalValue;
-      ctx.reverseMapping[originalValue] = placeholder;
-    }
-
-    entityPlaceholders.set(entity, placeholder);
-  }
-
-  // Second pass: sort by start position descending for replacement
-  // This ensures string indices remain valid as we replace
-  const sortedByEnd = [...resolved].sort((a, b) => b.start - a.start);
-
-  let result = text;
-  for (const entity of sortedByEnd) {
-    const placeholder = entityPlaceholders.get(entity)!;
-    result = result.slice(0, entity.start) + placeholder + result.slice(entity.end);
-  }
-
-  return { masked: result, context: ctx };
-}
-
-/**
- * Unmasks text by replacing placeholders with original values
- *
- * Optionally adds markers to indicate protected content
- */
-export function unmask(text: string, context: MaskingContext, config: MaskingConfig): string {
-  let result = text;
-
-  // Sort placeholders by length descending to avoid partial replacements
-  const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length);
-
-  for (const placeholder of placeholders) {
-    const originalValue = context.mapping[placeholder];
-    const replacement = config.show_markers
-      ? `${config.marker_text}${originalValue}`
-      : originalValue;
-
-    // Replace all occurrences of the placeholder
-    result = result.split(placeholder).join(replacement);
-  }
-
-  return result;
-}
-
-/**
- * Masks multiple messages (for chat completions)
- */
-export function maskMessages(
-  messages: ChatMessage[],
-  entitiesByMessage: PIIEntity[][],
-): { masked: ChatMessage[]; context: MaskingContext } {
-  const context = createMaskingContext();
-
-  const masked = messages.map((msg, i) => {
-    const entities = entitiesByMessage[i] || [];
-
-    // Handle array content (multimodal messages)
-    if (Array.isArray(msg.content)) {
-      if (entities.length === 0) {
-        return msg;
-      }
-
-      // Track offset position within the concatenated text for this message
-      // (matches how extractTextContent joins parts with \n)
-      let partOffset = 0;
-
-      // Mask only text parts with proper offset tracking
-      const maskedContent = msg.content.map((part) => {
-        if (part.type === "text" && typeof part.text === "string") {
-          const partLength = part.text.length;
-
-          // Find entities that apply to this specific part
-          const partEntities = entities
-            .filter((e) => e.start < partOffset + partLength && e.end > partOffset)
-            .map((e) => ({
-              ...e,
-              start: Math.max(0, e.start - partOffset),
-              end: Math.min(partLength, e.end - partOffset),
-            }));
-
-          if (partEntities.length > 0) {
-            const { masked: maskedText } = mask(part.text, partEntities, context);
-            partOffset += partLength + 1; // +1 for \n separator
-            return { ...part, text: maskedText };
-          }
-
-          partOffset += partLength + 1; // +1 for \n separator
-          return part;
-        }
-        return part;
-      });
-
-      return { ...msg, content: maskedContent };
-    }
-
-    // Handle string content (text-only messages)
-    const text = extractTextContent(msg.content);
-    const { masked: maskedContent } = mask(text, entities, context);
-
-    // If original content was a string, return masked string
-    // Otherwise return original content
-    return { ...msg, content: typeof msg.content === "string" ? maskedContent : msg.content };
-  });
-
-  return { masked, context };
-}
-
-/**
- * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found
- *
- * Returns the unmasked portion and any remaining buffer that might contain partial placeholders
- */
-export function unmaskStreamChunk(
-  buffer: string,
-  newChunk: string,
-  context: MaskingContext,
-  config: MaskingConfig,
-): { output: string; remainingBuffer: string } {
-  const combined = buffer + newChunk;
-
-  const partialStart = findPartialPlaceholderStart(combined);
-
-  if (partialStart === -1) {
-    // No partial placeholder, safe to unmask everything
-    return {
-      output: unmask(combined, context, config),
-      remainingBuffer: "",
-    };
-  }
-
-  // Partial placeholder detected, buffer it
-  const safeToProcess = combined.slice(0, partialStart);
-  const toBuffer = combined.slice(partialStart);
-
-  return {
-    output: unmask(safeToProcess, context, config),
-    remainingBuffer: toBuffer,
-  };
-}
-
-/**
- * Flushes remaining buffer at end of stream
- */
-export function flushStreamBuffer(
-  buffer: string,
-  context: MaskingContext,
-  config: MaskingConfig,
-): string {
-  if (!buffer) return "";
-  return unmask(buffer, context, config);
-}
-
-/**
- * Unmasks a chat completion response by replacing placeholders in all choices
- */
-export function unmaskResponse(
-  response: ChatCompletionResponse,
-  context: MaskingContext,
-  config: MaskingConfig,
-): ChatCompletionResponse {
-  return {
-    ...response,
-    choices: response.choices.map((choice) => ({
-      ...choice,
-      message: {
-        ...choice.message,
-        content:
-          typeof choice.message.content === "string"
-            ? unmask(choice.message.content, context, config)
-            : choice.message.content,
-      },
-    })),
-  };
-}
diff --git a/src/services/stream-transformer.test.ts b/src/services/stream-transformer.test.ts

index b79cc56682b361ef3c4173851b59fbc6d6018fca..eba9922db2d75f66ed58f2e40fea32568a286a42 100644 (file)
--- a/src/services/stream-transformer.test.ts
+++ b/src/services/stream-transformer.test.ts
@@ -1,6 +1,6 @@
  import { describe, expect, test } from "bun:test";
  import type { MaskingConfig } from "../config";
-import { createMaskingContext } from "./masking";
+import { createMaskingContext } from "../pii/mask";
  import { createUnmaskingStream } from "./stream-transformer";
  
  const defaultConfig: MaskingConfig = {
diff --git a/src/services/stream-transformer.ts b/src/services/stream-transformer.ts

index aa69b2fef82b6cb862259245df470a55bb106dc1..ea64adde9a59ba049a867d9c033f55ac43c8ab33 100644 (file)
--- a/src/services/stream-transformer.ts
+++ b/src/services/stream-transformer.ts
@@ -1,10 +1,7 @@
  import type { MaskingConfig } from "../config";
-import {
-  flushRedactionBuffer,
-  type RedactionContext,
-  unredactStreamChunk,
-} from "../secrets/redact";
-import { flushStreamBuffer, type MaskingContext, unmaskStreamChunk } from "./masking";
+import { flushMaskingBuffer, unmaskStreamChunk } from "../pii/mask";
+import { flushSecretsMaskingBuffer, unmaskSecretsStreamChunk } from "../secrets/mask";
+import type { PlaceholderContext } from "../utils/message-transform";
  
  /**
   * Creates a transform stream that unmasks SSE content
@@ -12,13 +9,13 @@ import { flushStreamBuffer, type MaskingContext, unmaskStreamChunk } from "./mas
   * Processes Server-Sent Events (SSE) chunks, buffering partial placeholders
   * and unmasking complete ones before forwarding to the client.
   *
- * Supports both PII unmasking and secret unredaction, or either alone.
+ * Supports both PII unmasking and secrets unmasking, or either alone.
   */
  export function createUnmaskingStream(
    source: ReadableStream<Uint8Array>,
-  piiContext: MaskingContext | undefined,
+  piiContext: PlaceholderContext | undefined,
    config: MaskingConfig,
-  secretsContext?: RedactionContext,
+  secretsContext?: PlaceholderContext,
  ): ReadableStream<Uint8Array> {
    const decoder = new TextDecoder();
    const encoder = new TextEncoder();
@@ -39,14 +36,14 @@ export function createUnmaskingStream(
  
              // Flush PII buffer first
              if (piiBuffer && piiContext) {
-              flushed = flushStreamBuffer(piiBuffer, piiContext, config);
+              flushed = flushMaskingBuffer(piiBuffer, piiContext, config);
              } else if (piiBuffer) {
                flushed = piiBuffer;
              }
  
              // Then flush secrets buffer
              if (secretsBuffer && secretsContext) {
-              flushed += flushRedactionBuffer(secretsBuffer, secretsContext);
+              flushed += flushSecretsMaskingBuffer(secretsBuffer, secretsContext);
              } else if (secretsBuffer) {
                flushed += secretsBuffer;
              }
@@ -101,9 +98,9 @@ export function createUnmaskingStream(
                      processedContent = output;
                    }
  
-                  // Then unredact secrets if context provided
+                  // Then unmask secrets if context provided
                    if (secretsContext && processedContent) {
-                    const { output, remainingBuffer } = unredactStreamChunk(
+                    const { output, remainingBuffer } = unmaskSecretsStreamChunk(
                        secretsBuffer,
                        processedContent,
                        secretsContext,
diff --git a/src/test-utils/detection-results.ts b/src/test-utils/detection-results.ts

new file mode 100644 (file)

index 0000000..30cfc48
--- /dev/null
+++ b/src/test-utils/detection-results.ts
@@ -0,0 +1,51 @@
+/**
+ * Test utilities for creating detection results
+ *
+ * Shared helpers for creating PIIDetectionResult and MessageSecretsResult
+ * from per-message, per-part data in tests.
+ */
+
+import type { SupportedLanguage } from "../constants/languages";
+import type { PIIDetectionResult, PIIEntity } from "../pii/detect";
+import type { MessageSecretsResult, SecretLocation } from "../secrets/detect";
+
+/**
+ * Creates a PIIDetectionResult from per-message, per-part entities
+ *
+ * @param messageEntities - Nested array: messageEntities[msgIdx][partIdx] = entities[]
+ * @param options - Optional overrides for language, scanTimeMs, etc.
+ */
+export function createPIIResult(
+  messageEntities: PIIEntity[][][],
+  options: {
+    language?: SupportedLanguage;
+    languageFallback?: boolean;
+    detectedLanguage?: string;
+    scanTimeMs?: number;
+  } = {},
+): PIIDetectionResult {
+  const allEntities = messageEntities.flat(2);
+  return {
+    hasPII: allEntities.length > 0,
+    messageEntities,
+    allEntities,
+    scanTimeMs: options.scanTimeMs ?? 0,
+    language: options.language ?? "en",
+    languageFallback: options.languageFallback ?? false,
+    detectedLanguage: options.detectedLanguage,
+  };
+}
+
+/**
+ * Creates a MessageSecretsResult from per-message, per-part locations
+ *
+ * @param messageLocations - Nested array: messageLocations[msgIdx][partIdx] = locations[]
+ */
+export function createSecretsResult(messageLocations: SecretLocation[][][]): MessageSecretsResult {
+  const hasLocations = messageLocations.some((msg) => msg.some((part) => part.length > 0));
+  return {
+    detected: hasLocations,
+    matches: [], // Matches are aggregated separately in real detection
+    messageLocations,
+  };
+}
diff --git a/src/utils/conflict-resolver.ts b/src/utils/conflict-resolver.ts

index 5c2c09071b0530d20e924222cce92782ecf70fb8..1ae4148fa1a5d8cfea38f5788e75ad80a9e350d1 100644 (file)
--- a/src/utils/conflict-resolver.ts
+++ b/src/utils/conflict-resolver.ts
@@ -1,19 +1,23 @@
  // Conflict resolution based on Microsoft Presidio's logic
  // https://github.com/microsoft/presidio/blob/main/presidio-anonymizer/presidio_anonymizer/anonymizer_engine.py
  
-export interface EntityWithScore {
+/**
+ * Base interface for items with position (used by both PII and secrets)
+ */
+export interface Span {
    start: number;
    end: number;
-  score: number;
-  entity_type: string;
  }
  
-interface Interval {
-  start: number;
-  end: number;
+/**
+ * Extended interface for PII entities with confidence scores
+ */
+export interface EntityWithScore extends Span {
+  score: number;
+  entity_type: string;
  }
  
-function overlaps(a: Interval, b: Interval): boolean {
+function overlaps(a: Span, b: Span): boolean {
    return a.start < b.end && b.start < a.end;
  }
  
@@ -28,7 +32,7 @@ function groupBy<T>(items: T[], keyFn: (item: T) => string): Map<string, T[]> {
    return groups;
  }
  
-function mergeOverlapping<T extends Interval>(intervals: T[], merge: (a: T, b: T) => T): T[] {
+function mergeOverlapping<T extends Span>(intervals: T[], merge: (a: T, b: T) => T): T[] {
    if (intervals.length <= 1) return [...intervals];
  
    const sorted = [...intervals].sort((a, b) => a.start - b.start);
@@ -92,11 +96,14 @@ export function resolveConflicts<T extends EntityWithScore>(entities: T[]): T[]
    return removeConflicting(afterMerge);
  }
  
-/** For secrets without scores. Keeps non-overlapping, longer wins ties. */
-export function resolveOverlaps<T extends Interval>(entities: T[]): T[] {
-  if (entities.length <= 1) return [...entities];
+/**
+ * Simple conflict resolution for items without scores (secrets)
+ * Keeps non-overlapping spans, longer span wins ties.
+ */
+export function resolveOverlaps<T extends Span>(items: T[]): T[] {
+  if (items.length <= 1) return [...items];
  
-  const sorted = [...entities].sort((a, b) => {
+  const sorted = [...items].sort((a, b) => {
      if (a.start !== b.start) return a.start - b.start;
      return b.end - b.start - (a.end - a.start);
    });
diff --git a/src/utils/content.test.ts b/src/utils/content.test.ts

index 0040d0749ea41cee8f8adfbc2b1f5803c475a63b..3b60a2b113cd2bb603a2357645971dc5fecf92de 100644 (file)
--- a/src/utils/content.test.ts
+++ b/src/utils/content.test.ts
@@ -1,5 +1,5 @@
  import { describe, expect, test } from "bun:test";
-import { type ContentPart, extractTextContent, hasTextContent } from "./content";
+import { type ContentPart, extractTextContent } from "./content";
  
  describe("extractTextContent", () => {
    test("returns empty string for null", () => {
@@ -47,33 +47,3 @@ describe("extractTextContent", () => {
      expect(extractTextContent([])).toBe("");
    });
  });
-
-describe("hasTextContent", () => {
-  test("returns false for null", () => {
-    expect(hasTextContent(null)).toBe(false);
-  });
-
-  test("returns false for undefined", () => {
-    expect(hasTextContent(undefined)).toBe(false);
-  });
-
-  test("returns true for non-empty string", () => {
-    expect(hasTextContent("Hello")).toBe(true);
-  });
-
-  test("returns false for empty string", () => {
-    expect(hasTextContent("")).toBe(false);
-  });
-
-  test("returns true for array with text", () => {
-    const content: ContentPart[] = [{ type: "text", text: "Hello" }];
-    expect(hasTextContent(content)).toBe(true);
-  });
-
-  test("returns false for array without text", () => {
-    const content: ContentPart[] = [
-      { type: "image_url", image_url: { url: "https://example.com/image.jpg" } },
-    ];
-    expect(hasTextContent(content)).toBe(false);
-  });
-});
diff --git a/src/utils/content.ts b/src/utils/content.ts

index 5e5298365b04050bcfdbb09dad6c6a693fcf39d1..7a256dea627e1bd686ac0664cb92a07feb908eee 100644 (file)
--- a/src/utils/content.ts
+++ b/src/utils/content.ts
@@ -67,13 +67,3 @@ export function extractTextContent(content: MessageContent): string {
    // Unexpected type - return empty string
    return "";
  }
-
-/**
- * Checks if content has any text
- *
- * @param content - The message content to check
- * @returns true if content contains text, false otherwise
- */
-export function hasTextContent(content: MessageContent): boolean {
-  return extractTextContent(content).length > 0;
-}
diff --git a/src/utils/message-transform.test.ts b/src/utils/message-transform.test.ts

new file mode 100644 (file)

index 0000000..0c68abc
--- /dev/null
+++ b/src/utils/message-transform.test.ts
@@ -0,0 +1,555 @@
+import { describe, expect, test } from "bun:test";
+import type { ChatMessage } from "../services/llm-client";
+import type { Span } from "./conflict-resolver";
+import {
+  createPlaceholderContext,
+  flushBuffer,
+  incrementAndGenerate,
+  processStreamChunk,
+  replaceWithPlaceholders,
+  restorePlaceholders,
+  restoreResponsePlaceholders,
+  transformMessagesPerPart,
+} from "./message-transform";
+
+/**
+ * Simple placeholder format for testing: [[TYPE_N]]
+ */
+function testPlaceholder(type: string, count: number): string {
+  return `[[${type}_${count}]]`;
+}
+
+/**
+ * Simple conflict resolver that keeps non-overlapping items (first wins)
+ */
+function simpleResolveConflicts<T extends Span>(items: T[]): T[] {
+  if (items.length <= 1) return [...items];
+  const sorted = [...items].sort((a, b) => a.start - b.start);
+  const result: T[] = [sorted[0]];
+  for (let i = 1; i < sorted.length; i++) {
+    const current = sorted[i];
+    const last = result[result.length - 1];
+    if (current.start >= last.end) {
+      result.push(current);
+    }
+  }
+  return result;
+}
+
+interface TestItem extends Span {
+  type: string;
+}
+
+describe("createPlaceholderContext", () => {
+  test("creates empty context", () => {
+    const ctx = createPlaceholderContext();
+    expect(ctx.mapping).toEqual({});
+    expect(ctx.reverseMapping).toEqual({});
+    expect(ctx.counters).toEqual({});
+  });
+});
+
+describe("incrementAndGenerate", () => {
+  test("increments counter and generates placeholder", () => {
+    const ctx = createPlaceholderContext();
+
+    const p1 = incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+    expect(p1).toBe("[[EMAIL_1]]");
+    expect(ctx.counters.EMAIL).toBe(1);
+
+    const p2 = incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+    expect(p2).toBe("[[EMAIL_2]]");
+    expect(ctx.counters.EMAIL).toBe(2);
+  });
+
+  test("tracks different types separately", () => {
+    const ctx = createPlaceholderContext();
+
+    incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+    incrementAndGenerate("PERSON", ctx, testPlaceholder);
+    incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+
+    expect(ctx.counters.EMAIL).toBe(2);
+    expect(ctx.counters.PERSON).toBe(1);
+  });
+});
+
+describe("replaceWithPlaceholders", () => {
+  test("returns original text when no items", () => {
+    const ctx = createPlaceholderContext();
+    const result = replaceWithPlaceholders(
+      "Hello world",
+      [],
+      ctx,
+      (item: TestItem) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+    expect(result).toBe("Hello world");
+  });
+
+  test("replaces single item", () => {
+    const ctx = createPlaceholderContext();
+    const items: TestItem[] = [{ start: 0, end: 5, type: "WORD" }];
+
+    const result = replaceWithPlaceholders(
+      "Hello world",
+      items,
+      ctx,
+      (item) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    expect(result).toBe("[[WORD_1]] world");
+    expect(ctx.mapping["[[WORD_1]]"]).toBe("Hello");
+  });
+
+  test("replaces multiple items", () => {
+    const ctx = createPlaceholderContext();
+    const items: TestItem[] = [
+      { start: 0, end: 5, type: "WORD" },
+      { start: 6, end: 11, type: "WORD" },
+    ];
+
+    const result = replaceWithPlaceholders(
+      "Hello world",
+      items,
+      ctx,
+      (item) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    expect(result).toBe("[[WORD_1]] [[WORD_2]]");
+  });
+
+  test("reuses placeholder for duplicate values", () => {
+    const ctx = createPlaceholderContext();
+    const items: TestItem[] = [
+      { start: 0, end: 3, type: "WORD" },
+      { start: 8, end: 11, type: "WORD" },
+    ];
+
+    const result = replaceWithPlaceholders(
+      "foo bar foo",
+      items,
+      ctx,
+      (item) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    expect(result).toBe("[[WORD_1]] bar [[WORD_1]]");
+    expect(Object.keys(ctx.mapping)).toHaveLength(1);
+  });
+
+  test("preserves context across calls", () => {
+    const ctx = createPlaceholderContext();
+
+    replaceWithPlaceholders(
+      "Hello",
+      [{ start: 0, end: 5, type: "WORD" }],
+      ctx,
+      (item: TestItem) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    const result = replaceWithPlaceholders(
+      "World",
+      [{ start: 0, end: 5, type: "WORD" }],
+      ctx,
+      (item: TestItem) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    expect(result).toBe("[[WORD_2]]");
+    expect(ctx.mapping["[[WORD_1]]"]).toBe("Hello");
+    expect(ctx.mapping["[[WORD_2]]"]).toBe("World");
+  });
+
+  test("handles adjacent items", () => {
+    const ctx = createPlaceholderContext();
+    const items: TestItem[] = [
+      { start: 0, end: 2, type: "A" },
+      { start: 2, end: 4, type: "B" },
+    ];
+
+    const result = replaceWithPlaceholders(
+      "AABB",
+      items,
+      ctx,
+      (item) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    expect(result).toBe("[[A_1]][[B_1]]");
+  });
+});
+
+describe("restorePlaceholders", () => {
+  test("returns original text when no mappings", () => {
+    const ctx = createPlaceholderContext();
+    expect(restorePlaceholders("Hello world", ctx)).toBe("Hello world");
+  });
+
+  test("restores single placeholder", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[WORD_1]]"] = "Hello";
+
+    expect(restorePlaceholders("[[WORD_1]] world", ctx)).toBe("Hello world");
+  });
+
+  test("restores multiple placeholders", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[A_1]]"] = "Hello";
+    ctx.mapping["[[B_1]]"] = "World";
+
+    expect(restorePlaceholders("[[A_1]] [[B_1]]", ctx)).toBe("Hello World");
+  });
+
+  test("restores repeated placeholders", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "test";
+
+    expect(restorePlaceholders("[[X_1]] and [[X_1]]", ctx)).toBe("test and test");
+  });
+
+  test("applies formatValue function", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "secret";
+
+    const result = restorePlaceholders("Value: [[X_1]]", ctx, (v) => `[REDACTED:${v}]`);
+    expect(result).toBe("Value: [REDACTED:secret]");
+  });
+
+  test("leaves unknown placeholders unchanged", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "known";
+
+    expect(restorePlaceholders("[[X_1]] [[Y_1]]", ctx)).toBe("known [[Y_1]]");
+  });
+});
+
+describe("replace -> restore roundtrip", () => {
+  test("preserves original data", () => {
+    const ctx = createPlaceholderContext();
+    const original = "Contact john@example.com or call +1234567890";
+    const items: TestItem[] = [
+      { start: 8, end: 24, type: "EMAIL" },
+      { start: 33, end: 44, type: "PHONE" },
+    ];
+
+    const replaced = replaceWithPlaceholders(
+      original,
+      items,
+      ctx,
+      (item) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    expect(replaced).not.toContain("john@example.com");
+    expect(replaced).not.toContain("+1234567890");
+
+    const restored = restorePlaceholders(replaced, ctx);
+    expect(restored).toBe(original);
+  });
+});
+
+describe("transformMessagesPerPart", () => {
+  test("transforms string content", () => {
+    const messages: ChatMessage[] = [{ role: "user", content: "Hello world" }];
+    const perPartData = [[[{ marker: true }]]];
+
+    const result = transformMessagesPerPart(
+      messages,
+      perPartData,
+      (text, data) => (data.length > 0 ? text.toUpperCase() : text),
+      {},
+    );
+
+    expect(result[0].content).toBe("HELLO WORLD");
+  });
+
+  test("skips messages without data", () => {
+    const messages: ChatMessage[] = [
+      { role: "user", content: "Keep this" },
+      { role: "assistant", content: "And this" },
+    ];
+    const perPartData = [[[]], [[]]];
+
+    const result = transformMessagesPerPart(
+      messages,
+      perPartData,
+      (text) => text.toUpperCase(),
+      {},
+    );
+
+    expect(result[0].content).toBe("Keep this");
+    expect(result[1].content).toBe("And this");
+  });
+
+  test("transforms array content (multimodal)", () => {
+    const messages: ChatMessage[] = [
+      {
+        role: "user",
+        content: [
+          { type: "text", text: "Hello" },
+          { type: "image_url", image_url: { url: "https://example.com/img.jpg" } },
+        ],
+      },
+    ];
+    const perPartData = [[[{ marker: true }], []]];
+
+    const result = transformMessagesPerPart(
+      messages,
+      perPartData,
+      (text, data) => (data.length > 0 ? text.toUpperCase() : text),
+      {},
+    );
+
+    const content = result[0].content as Array<{ type: string; text?: string }>;
+    expect(content[0].text).toBe("HELLO");
+    expect(content[1].type).toBe("image_url");
+  });
+
+  test("preserves message roles", () => {
+    const messages: ChatMessage[] = [
+      { role: "system", content: "sys" },
+      { role: "user", content: "usr" },
+      { role: "assistant", content: "ast" },
+    ];
+    const perPartData = [[[]], [[]], [[]]];
+
+    const result = transformMessagesPerPart(messages, perPartData, (t) => t, {});
+
+    expect(result[0].role).toBe("system");
+    expect(result[1].role).toBe("user");
+    expect(result[2].role).toBe("assistant");
+  });
+
+  test("passes context to transform function", () => {
+    const messages: ChatMessage[] = [{ role: "user", content: "test" }];
+    const perPartData = [[[{ id: 1 }]]];
+    const ctx = { prefix: ">> " };
+
+    const result = transformMessagesPerPart(
+      messages,
+      perPartData,
+      (text, _data, context: { prefix: string }) => context.prefix + text,
+      ctx,
+    );
+
+    expect(result[0].content).toBe(">> test");
+  });
+});
+
+describe("restoreResponsePlaceholders", () => {
+  test("restores placeholders in response choices", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "secret";
+
+    const response = {
+      id: "test",
+      choices: [{ message: { content: "Value: [[X_1]]" } }],
+    };
+
+    const result = restoreResponsePlaceholders(response, ctx);
+    expect(result.choices[0].message.content).toBe("Value: secret");
+  });
+
+  test("handles multiple choices", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "val";
+
+    const response = {
+      id: "test",
+      choices: [{ message: { content: "A: [[X_1]]" } }, { message: { content: "B: [[X_1]]" } }],
+    };
+
+    const result = restoreResponsePlaceholders(response, ctx);
+    expect(result.choices[0].message.content).toBe("A: val");
+    expect(result.choices[1].message.content).toBe("B: val");
+  });
+
+  test("preserves response structure", () => {
+    const ctx = createPlaceholderContext();
+    const response = {
+      id: "resp-123",
+      model: "test-model",
+      choices: [{ message: { content: "text" } }],
+      usage: { tokens: 10 },
+    };
+
+    const result = restoreResponsePlaceholders(response, ctx);
+    expect(result.id).toBe("resp-123");
+    expect(result.model).toBe("test-model");
+    expect(result.usage).toEqual({ tokens: 10 });
+  });
+
+  test("applies formatValue function", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "secret";
+
+    const response = {
+      id: "test",
+      choices: [{ message: { content: "[[X_1]]" } }],
+    };
+
+    const result = restoreResponsePlaceholders(response, ctx, (v) => `<${v}>`);
+    expect(result.choices[0].message.content).toBe("<secret>");
+  });
+
+  test("handles non-string content", () => {
+    const ctx = createPlaceholderContext();
+    const response = {
+      id: "test",
+      choices: [{ message: { content: null } }],
+    };
+
+    const result = restoreResponsePlaceholders(response, ctx);
+    expect(result.choices[0].message.content).toBe(null);
+  });
+});
+
+describe("processStreamChunk", () => {
+  test("processes complete text without placeholders", () => {
+    const ctx = createPlaceholderContext();
+    const restore = (text: string) => text;
+
+    const { output, remainingBuffer } = processStreamChunk("", "Hello world", ctx, restore);
+
+    expect(output).toBe("Hello world");
+    expect(remainingBuffer).toBe("");
+  });
+
+  test("processes complete placeholder", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "secret";
+
+    const { output, remainingBuffer } = processStreamChunk(
+      "",
+      "Value: [[X_1]]!",
+      ctx,
+      restorePlaceholders,
+    );
+
+    expect(output).toBe("Value: secret!");
+    expect(remainingBuffer).toBe("");
+  });
+
+  test("buffers partial placeholder at end", () => {
+    const ctx = createPlaceholderContext();
+
+    const { output, remainingBuffer } = processStreamChunk(
+      "",
+      "Hello [[PARTIAL",
+      ctx,
+      restorePlaceholders,
+    );
+
+    expect(output).toBe("Hello ");
+    expect(remainingBuffer).toBe("[[PARTIAL");
+  });
+
+  test("completes buffered placeholder", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "done";
+
+    const { output, remainingBuffer } = processStreamChunk(
+      "[[X_",
+      "1]] end",
+      ctx,
+      restorePlaceholders,
+    );
+
+    expect(output).toBe("done end");
+    expect(remainingBuffer).toBe("");
+  });
+
+  test("handles multiple chunks with partial placeholders", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[LONG_PLACEHOLDER_1]]"] = "value";
+
+    // First chunk
+    const r1 = processStreamChunk("", "Start [[LONG_", ctx, restorePlaceholders);
+    expect(r1.output).toBe("Start ");
+    expect(r1.remainingBuffer).toBe("[[LONG_");
+
+    // Second chunk
+    const r2 = processStreamChunk(r1.remainingBuffer, "PLACEHOLDER_", ctx, restorePlaceholders);
+    expect(r2.output).toBe("");
+    expect(r2.remainingBuffer).toBe("[[LONG_PLACEHOLDER_");
+
+    // Third chunk completes it
+    const r3 = processStreamChunk(r2.remainingBuffer, "1]] end", ctx, restorePlaceholders);
+    expect(r3.output).toBe("value end");
+    expect(r3.remainingBuffer).toBe("");
+  });
+});
+
+describe("flushBuffer", () => {
+  test("returns empty string for empty buffer", () => {
+    const ctx = createPlaceholderContext();
+    expect(flushBuffer("", ctx, restorePlaceholders)).toBe("");
+  });
+
+  test("flushes incomplete placeholder as-is", () => {
+    const ctx = createPlaceholderContext();
+    expect(flushBuffer("[[INCOMPLETE", ctx, restorePlaceholders)).toBe("[[INCOMPLETE");
+  });
+
+  test("restores complete placeholder in buffer", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[X_1]]"] = "final";
+
+    expect(flushBuffer("[[X_1]]", ctx, restorePlaceholders)).toBe("final");
+  });
+});
+
+describe("edge cases", () => {
+  test("handles unicode text", () => {
+    const ctx = createPlaceholderContext();
+    const items: TestItem[] = [{ start: 0, end: 11, type: "NAME" }];
+
+    const result = replaceWithPlaceholders(
+      "François Müller",
+      items,
+      ctx,
+      (item) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+
+    // Note: JS string indices are UTF-16 code units
+    expect(ctx.mapping["[[NAME_1]]"]).toBe("François Mü");
+
+    const restored = restorePlaceholders(result, ctx);
+    expect(restored).toContain("François Mü");
+  });
+
+  test("handles empty text", () => {
+    const ctx = createPlaceholderContext();
+    const result = replaceWithPlaceholders(
+      "",
+      [],
+      ctx,
+      (item: TestItem) => item.type,
+      (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+      simpleResolveConflicts,
+    );
+    expect(result).toBe("");
+  });
+
+  test("handles placeholder-like text that is not in mapping", () => {
+    const ctx = createPlaceholderContext();
+    ctx.mapping["[[A_1]]"] = "known";
+
+    const result = restorePlaceholders("[[A_1]] and [[B_1]]", ctx);
+    expect(result).toBe("known and [[B_1]]");
+  });
+});
diff --git a/src/utils/message-transform.ts b/src/utils/message-transform.ts

new file mode 100644 (file)

index 0000000..424d152
--- /dev/null
+++ b/src/utils/message-transform.ts
@@ -0,0 +1,282 @@
+/**
+ * Generic utilities for per-part message transformations
+ *
+ * Both PII masking and secrets masking need to:
+ * 1. Iterate over messages and their content parts
+ * 2. Apply transformations based on per-part detection data
+ * 3. Handle string vs array content uniformly
+ *
+ * This module provides shared infrastructure to avoid duplication.
+ */
+
+import type { ChatMessage } from "../services/llm-client";
+import type { Span } from "./conflict-resolver";
+import type { ContentPart } from "./content";
+import { findPartialPlaceholderStart } from "./placeholders";
+
+/**
+ * Generic context for placeholder-based transformations
+ * Used by both PII masking and secrets masking
+ */
+export interface PlaceholderContext {
+  /** Maps placeholder -> original value */
+  mapping: Record<string, string>;
+  /** Maps original value -> placeholder (for deduplication) */
+  reverseMapping: Record<string, string>;
+  /** Counter per type for sequential numbering */
+  counters: Record<string, number>;
+}
+
+/**
+ * Result of masking text with placeholders
+ * Used by both PII masking and secrets masking
+ */
+export interface MaskResult {
+  /** Text with sensitive data replaced by placeholders */
+  masked: string;
+  /** Context for unmasking (maps placeholders to original values) */
+  context: PlaceholderContext;
+}
+
+/**
+ * Creates a new placeholder context
+ */
+export function createPlaceholderContext(): PlaceholderContext {
+  return {
+    mapping: {},
+    reverseMapping: {},
+    counters: {},
+  };
+}
+
+/**
+ * Increments counter for type and generates placeholder using format function
+ *
+ * Shared counter logic for both PII masking and secrets masking.
+ */
+export function incrementAndGenerate(
+  type: string,
+  context: PlaceholderContext,
+  format: (type: string, count: number) => string,
+): string {
+  const count = (context.counters[type] || 0) + 1;
+  context.counters[type] = count;
+  return format(type, count);
+}
+
+/**
+ * Transforms messages using per-part data
+ *
+ * Generic function that handles the common pattern of:
+ * - Iterating over messages
+ * - Handling string vs array content
+ * - Applying a transform function per text part
+ *
+ * @param messages - Chat messages to transform
+ * @param perPartData - Per-message, per-part data: data[msgIdx][partIdx]
+ * @param transform - Function to transform text using the part data
+ * @param context - Shared context passed to all transform calls
+ */
+export function transformMessagesPerPart<TData, TContext>(
+  messages: ChatMessage[],
+  perPartData: TData[][][],
+  transform: (text: string, data: TData[], context: TContext) => string,
+  context: TContext,
+): ChatMessage[] {
+  return messages.map((msg, msgIdx) => {
+    const partData = perPartData[msgIdx] || [];
+
+    // String content → data is in partData[0]
+    if (typeof msg.content === "string") {
+      const data = partData[0] || [];
+      if (data.length === 0) return msg;
+      const transformed = transform(msg.content, data, context);
+      return { ...msg, content: transformed };
+    }
+
+    // Array content (multimodal) → data is per-part
+    if (Array.isArray(msg.content)) {
+      const transformedContent = msg.content.map((part: ContentPart, partIdx: number) => {
+        const data = partData[partIdx] || [];
+        if (part.type === "text" && typeof part.text === "string" && data.length > 0) {
+          const transformed = transform(part.text, data, context);
+          return { ...part, text: transformed };
+        }
+        return part;
+      });
+      return { ...msg, content: transformedContent };
+    }
+
+    // Null/undefined content
+    return msg;
+  });
+}
+
+/**
+ * Restores placeholders in text with original values
+ *
+ * Generic function used by both PII unmasking and secrets unmasking.
+ *
+ * @param text - Text containing placeholders
+ * @param context - Context with placeholder mappings
+ * @param formatValue - Optional function to format restored values (e.g., add markers)
+ */
+export function restorePlaceholders(
+  text: string,
+  context: PlaceholderContext,
+  formatValue?: (original: string) => string,
+): string {
+  let result = text;
+
+  // Sort placeholders by length descending to avoid partial replacements
+  const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length);
+
+  for (const placeholder of placeholders) {
+    const originalValue = context.mapping[placeholder];
+    const replacement = formatValue ? formatValue(originalValue) : originalValue;
+    // Replace all occurrences of the placeholder
+    result = result.split(placeholder).join(replacement);
+  }
+
+  return result;
+}
+
+/**
+ * Restores placeholders in a chat completion response
+ *
+ * @param response - The response object with choices
+ * @param context - Context with placeholder mappings
+ * @param formatValue - Optional function to format restored values
+ */
+export function restoreResponsePlaceholders<
+  T extends { choices: Array<{ message: { content: unknown } }> },
+>(response: T, context: PlaceholderContext, formatValue?: (original: string) => string): T {
+  return {
+    ...response,
+    choices: response.choices.map((choice) => ({
+      ...choice,
+      message: {
+        ...choice.message,
+        content:
+          typeof choice.message.content === "string"
+            ? restorePlaceholders(choice.message.content, context, formatValue)
+            : choice.message.content,
+      },
+    })),
+  } as T;
+}
+
+/**
+ * Replaces items in text with placeholders
+ *
+ * Generic function used by both PII masking and secrets masking.
+ * Handles: conflict resolution, placeholder assignment, and replacement.
+ *
+ * @param text - Text to process
+ * @param items - Items with start/end positions to replace
+ * @param context - Placeholder context for tracking mappings
+ * @param getType - Function to get the type string from an item
+ * @param generatePlaceholder - Function to generate placeholder for a type
+ * @param resolveConflicts - Function to resolve overlapping items
+ */
+export function replaceWithPlaceholders<T extends Span>(
+  text: string,
+  items: T[],
+  context: PlaceholderContext,
+  getType: (item: T) => string,
+  generatePlaceholder: (type: string, context: PlaceholderContext) => string,
+  resolveConflicts: (items: T[]) => T[],
+): string {
+  if (items.length === 0) {
+    return text;
+  }
+
+  // Resolve conflicts between overlapping items
+  const resolved = resolveConflicts(items);
+
+  // First pass: sort by start position ascending to assign placeholders in order
+  const sortedByStart = [...resolved].sort((a, b) => a.start - b.start);
+
+  // Assign placeholders in order of appearance
+  const itemPlaceholders = new Map<T, string>();
+  for (const item of sortedByStart) {
+    const originalValue = text.slice(item.start, item.end);
+
+    // Check if we already have a placeholder for this exact value
+    let placeholder = context.reverseMapping[originalValue];
+
+    if (!placeholder) {
+      placeholder = generatePlaceholder(getType(item), context);
+      context.mapping[placeholder] = originalValue;
+      context.reverseMapping[originalValue] = placeholder;
+    }
+
+    itemPlaceholders.set(item, placeholder);
+  }
+
+  // Second pass: sort by start position descending for replacement
+  // This ensures string indices remain valid as we replace
+  const sortedByEnd = [...resolved].sort((a, b) => b.start - a.start);
+
+  let result = text;
+  for (const item of sortedByEnd) {
+    const placeholder = itemPlaceholders.get(item)!;
+    result = result.slice(0, item.start) + placeholder + result.slice(item.end);
+  }
+
+  return result;
+}
+
+/**
+ * Processes a stream chunk, buffering partial placeholders
+ *
+ * Generic function used by both PII unmasking and secrets unmasking.
+ *
+ * @param buffer - Previous buffer content
+ * @param newChunk - New chunk to process
+ * @param context - Placeholder context
+ * @param restore - Function to restore placeholders in text
+ */
+export function processStreamChunk(
+  buffer: string,
+  newChunk: string,
+  context: PlaceholderContext,
+  restore: (text: string, ctx: PlaceholderContext) => string,
+): { output: string; remainingBuffer: string } {
+  const combined = buffer + newChunk;
+
+  const partialStart = findPartialPlaceholderStart(combined);
+
+  if (partialStart === -1) {
+    // No partial placeholder, safe to restore everything
+    return {
+      output: restore(combined, context),
+      remainingBuffer: "",
+    };
+  }
+
+  // Partial placeholder detected, buffer it
+  const safeToProcess = combined.slice(0, partialStart);
+  const toBuffer = combined.slice(partialStart);
+
+  return {
+    output: restore(safeToProcess, context),
+    remainingBuffer: toBuffer,
+  };
+}
+
+/**
+ * Flushes remaining buffer at end of stream
+ *
+ * @param buffer - Remaining buffer content
+ * @param context - Placeholder context
+ * @param restore - Function to restore placeholders in text
+ */
+export function flushBuffer(
+  buffer: string,
+  context: PlaceholderContext,
+  restore: (text: string, ctx: PlaceholderContext) => string,
+): string {
+  if (!buffer) return "";
+  return restore(buffer, context);
+}
diff --git a/src/constants/placeholders.test.ts b/src/utils/placeholders.test.ts

similarity index 93%

rename from src/constants/placeholders.test.ts

rename to src/utils/placeholders.test.ts

index 48a3938841f736c77986e7e0e3f32aeb05c96dfa..39d4bb965e99158ab5670427fc6c2801a83cb7f2 100644 (file)
--- a/src/constants/placeholders.test.ts
+++ b/src/utils/placeholders.test.ts
@@ -23,7 +23,7 @@ describe("placeholder constants", () => {
    test("secret format uses correct delimiters", () => {
      expect(SECRET_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.start);
      expect(SECRET_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.end);
-    expect(SECRET_PLACEHOLDER_FORMAT).toBe("[[SECRET_REDACTED_{N}]]");
+    expect(SECRET_PLACEHOLDER_FORMAT).toBe("[[SECRET_MASKED_{N}]]");
    });
  });
  
@@ -42,12 +42,12 @@ describe("generatePlaceholder", () => {
  describe("generateSecretPlaceholder", () => {
    test("generates secret placeholder", () => {
      const result = generateSecretPlaceholder("API_KEY_OPENAI", 1);
-    expect(result).toBe("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
+    expect(result).toBe("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
    });
  
    test("generates secret placeholder with different type and count", () => {
      const result = generateSecretPlaceholder("PEM_PRIVATE_KEY", 2);
-    expect(result).toBe("[[SECRET_REDACTED_PEM_PRIVATE_KEY_2]]");
+    expect(result).toBe("[[SECRET_MASKED_PEM_PRIVATE_KEY_2]]");
    });
  });
  
diff --git a/src/constants/placeholders.ts b/src/utils/placeholders.ts

similarity index 87%

rename from src/constants/placeholders.ts

rename to src/utils/placeholders.ts

index 91bedf3f18d85b54c6e3d2c9a30c0e5c68689af1..708c84ba9ee7051d1643769c010869cf6fb940f3 100644 (file)
--- a/src/constants/placeholders.ts
+++ b/src/utils/placeholders.ts
@@ -1,5 +1,5 @@
  /**
- * Placeholder constants for PII masking and secrets redaction
+ * Placeholder constants for PII masking and secrets masking
   * Single source of truth for all placeholder-related logic
   */
  
@@ -11,8 +11,8 @@ export const PLACEHOLDER_DELIMITERS = {
  /** PII placeholder format: [[TYPE_N]] e.g. [[PERSON_1]], [[EMAIL_ADDRESS_2]] */
  export const PII_PLACEHOLDER_FORMAT = "[[{TYPE}_{N}]]";
  
-/** Secrets placeholder format: [[SECRET_REDACTED_TYPE_N]] e.g. [[SECRET_REDACTED_API_KEY_OPENAI_1]] */
-export const SECRET_PLACEHOLDER_FORMAT = "[[SECRET_REDACTED_{N}]]";
+/** Secrets placeholder format: [[SECRET_MASKED_TYPE_N]] e.g. [[SECRET_MASKED_API_KEY_OPENAI_1]] */
+export const SECRET_PLACEHOLDER_FORMAT = "[[SECRET_MASKED_{N}]]";
  
  /**
   * Generates a placeholder string from the format
author	Stefan Gasser <redacted>
	Sat, 17 Jan 2026 19:32:54 +0000 (20:32 +0100)
committer	GitHub <redacted>
	Sat, 17 Jan 2026 19:32:54 +0000 (20:32 +0100)
config.example.yaml		patch \| blob \| history
docs/api-reference/chat-completions.mdx		patch \| blob \| history
docs/concepts/secrets-detection.mdx		patch \| blob \| history
docs/configuration/secrets-detection.mdx		patch \| blob \| history
src/config.ts		patch \| blob \| history
src/constants/languages.ts	[new file with mode: 0644]	patch \| blob
src/index.ts		patch \| blob \| history
src/pii/detect.test.ts	[moved from src/services/pii-detector.test.ts with 79% similarity]	patch \| blob \| history
src/pii/detect.ts	[moved from src/services/pii-detector.ts with 75% similarity]	patch \| blob \| history
src/pii/mask.test.ts	[new file with mode: 0644]	patch \| blob
src/pii/mask.ts	[new file with mode: 0644]	patch \| blob
src/routes/info.ts		patch \| blob \| history
src/routes/proxy.ts		patch \| blob \| history
src/secrets/detect.test.ts		patch \| blob \| history
src/secrets/detect.ts		patch \| blob \| history
src/secrets/mask.test.ts	[new file with mode: 0644]	patch \| blob
src/secrets/mask.ts	[new file with mode: 0644]	patch \| blob
src/secrets/multimodal.test.ts		patch \| blob \| history
src/secrets/patterns/api-keys.ts		patch \| blob \| history
src/secrets/patterns/env-vars.ts		patch \| blob \| history
src/secrets/patterns/private-keys.ts		patch \| blob \| history
src/secrets/patterns/tokens.ts		patch \| blob \| history
src/secrets/patterns/types.ts		patch \| blob \| history
src/secrets/patterns/utils.ts		patch \| blob \| history
src/secrets/redact.test.ts	[deleted file]	patch \| blob \| history
src/secrets/redact.ts	[deleted file]	patch \| blob \| history
src/services/decision.test.ts		patch \| blob \| history
src/services/decision.ts		patch \| blob \| history
src/services/language-detector.ts		patch \| blob \| history
src/services/masking.test.ts	[deleted file]	patch \| blob \| history
src/services/masking.ts	[deleted file]	patch \| blob \| history
src/services/stream-transformer.test.ts		patch \| blob \| history
src/services/stream-transformer.ts		patch \| blob \| history
src/test-utils/detection-results.ts	[new file with mode: 0644]	patch \| blob
src/utils/conflict-resolver.ts		patch \| blob \| history
src/utils/content.test.ts		patch \| blob \| history
src/utils/content.ts		patch \| blob \| history
src/utils/message-transform.test.ts	[new file with mode: 0644]	patch \| blob
src/utils/message-transform.ts	[new file with mode: 0644]	patch \| blob
src/utils/placeholders.test.ts	[moved from src/constants/placeholders.test.ts with 93% similarity]	patch \| blob \| history
src/utils/placeholders.ts	[moved from src/constants/placeholders.ts with 87% similarity]	patch \| blob \| history