enabled: true
# Action to take when secrets are detected:
- # redact: Replace secrets with placeholders, unmask in response (default)
+ # mask: Replace secrets with placeholders, unmask in response (default)
# block: Block the request with HTTP 400
# route_local: Route to local provider (only works in route mode)
- action: redact
+ action: mask
# Secret types to detect
# Private Keys (enabled by default):
| `X-PasteGuard-Language-Fallback` | `true` if configured language was not available |
| `X-PasteGuard-Secrets-Detected` | `true` if secrets were found |
| `X-PasteGuard-Secrets-Types` | Comma-separated list of detected secret types |
-| `X-PasteGuard-Secrets-Redacted` | `true` if secrets were redacted |
+| `X-PasteGuard-Secrets-Masked` | `true` if secrets were masked |
# Secrets Detection
-PasteGuard detects secrets before PII detection and can block, redact, or route requests containing sensitive credentials.
+PasteGuard detects secrets before PII detection and can block, mask, or route requests containing sensitive credentials.
## Supported Secret Types
| Action | Description |
|--------|-------------|
-| `redact` | Replace secrets with placeholders, restore in response (default) |
+| `mask` | Replace secrets with placeholders, restore in response (default) |
| `block` | Return HTTP 400, request never reaches LLM |
| `route_local` | Route to local LLM (requires route mode) |
-### Redact (Default)
+### Mask (Default)
```yaml
secrets_detection:
- action: redact
+ action: mask
```
Secrets are replaced with placeholders and restored in the response (like PII masking).
X-PasteGuard-Secrets-Types: OPENSSH_PRIVATE_KEY,API_KEY_OPENAI
```
-If secrets were redacted:
+If secrets were masked:
```
-X-PasteGuard-Secrets-Redacted: true
+X-PasteGuard-Secrets-Masked: true
```
```yaml
secrets_detection:
enabled: true
- action: redact
+ action: mask
entities:
- OPENSSH_PRIVATE_KEY
- PEM_PRIVATE_KEY
| Option | Default | Description |
|--------|---------|-------------|
| `enabled` | `true` | Enable secrets detection |
-| `action` | `redact` | Action when secrets found |
+| `action` | `mask` | Action when secrets found |
| `entities` | Private keys | Secret types to detect |
| `max_scan_chars` | `200000` | Max characters to scan (0 = unlimited) |
| `log_detected_types` | `true` | Log detected types (never logs content) |
| Action | Description |
|--------|-------------|
-| `redact` | Replace secrets with placeholders, restore in response (default) |
+| `mask` | Replace secrets with placeholders, restore in response (default) |
| `block` | Return HTTP 400, request never reaches LLM |
| `route_local` | Route to local LLM (requires route mode) |
-### Redact (Default)
+### Mask (Default)
```yaml
secrets_detection:
- action: redact
+ action: mask
```
### Block
import { existsSync, readFileSync, statSync } from "node:fs";
import { parse as parseYaml } from "yaml";
import { z } from "zod";
+import { SUPPORTED_LANGUAGES } from "./constants/languages";
// Schema definitions
marker_text: z.string().default("[protected]"),
});
-// All 25 spaCy languages with trained pipelines
-// See docker/presidio/languages.yaml for full list
-const SupportedLanguages = [
- "ca", // Catalan
- "zh", // Chinese
- "hr", // Croatian
- "da", // Danish
- "nl", // Dutch
- "en", // English
- "fi", // Finnish
- "fr", // French
- "de", // German
- "el", // Greek
- "it", // Italian
- "ja", // Japanese
- "ko", // Korean
- "lt", // Lithuanian
- "mk", // Macedonian
- "nb", // Norwegian
- "pl", // Polish
- "pt", // Portuguese
- "ro", // Romanian
- "ru", // Russian
- "sl", // Slovenian
- "es", // Spanish
- "sv", // Swedish
- "uk", // Ukrainian
-] as const;
-
-const LanguageEnum = z.enum(SupportedLanguages);
+const LanguageEnum = z.enum(SUPPORTED_LANGUAGES);
// Accept either array or comma-separated string for languages
// This allows using env vars like PASTEGUARD_LANGUAGES=en,de,fr
.union([z.array(LanguageEnum), z.string()])
.transform((val) => {
if (Array.isArray(val)) return val;
- return val.split(",").map((s) => s.trim()) as (typeof SupportedLanguages)[number][];
+ return val.split(",").map((s) => s.trim()) as (typeof SUPPORTED_LANGUAGES)[number][];
})
.pipe(z.array(LanguageEnum))
.default(["en"]);
const SecretsDetectionSchema = z.object({
enabled: z.boolean().default(true),
- action: z.enum(["block", "redact", "route_local"]).default("redact"),
+ action: z.enum(["block", "mask", "route_local"]).default("mask"),
entities: z.array(z.enum(SecretEntityTypes)).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]),
max_scan_chars: z.coerce.number().int().min(0).default(200000),
log_detected_types: z.boolean().default(true),
},
{
message:
- "secrets_detection.action 'route_local' is not compatible with mode 'mask'. Use mode 'route' or change secrets_detection.action to 'block' or 'redact'",
+ "secrets_detection.action 'route_local' is not compatible with mode 'mask'. Use mode 'route' or change secrets_detection.action to 'block' or 'mask'",
},
);
--- /dev/null
+/**
+ * All 24 spaCy languages with trained pipelines
+ * See docker/presidio/languages.yaml for full list
+ */
+export const SUPPORTED_LANGUAGES = [
+ "ca", // Catalan
+ "zh", // Chinese
+ "hr", // Croatian
+ "da", // Danish
+ "nl", // Dutch
+ "en", // English
+ "fi", // Finnish
+ "fr", // French
+ "de", // German
+ "el", // Greek
+ "it", // Italian
+ "ja", // Japanese
+ "ko", // Korean
+ "lt", // Lithuanian
+ "mk", // Macedonian
+ "nb", // Norwegian
+ "pl", // Polish
+ "pt", // Portuguese
+ "ro", // Romanian
+ "ru", // Russian
+ "sl", // Slovenian
+ "es", // Spanish
+ "sv", // Swedish
+ "uk", // Ukrainian
+] as const;
+
+export type SupportedLanguage = (typeof SUPPORTED_LANGUAGES)[number];
import { HTTPException } from "hono/http-exception";
import { logger } from "hono/logger";
import { getConfig } from "./config";
+import { getPIIDetector } from "./pii/detect";
import { dashboardRoutes } from "./routes/dashboard";
import { healthRoutes } from "./routes/health";
import { infoRoutes } from "./routes/info";
import { proxyRoutes } from "./routes/proxy";
import { getLogger } from "./services/logger";
-import { getPIIDetector } from "./services/pii-detector";
type Variables = {
requestId: string;
if (config.secrets_detection.action === "route_local" && config.mode === "mask") {
console.error("\n❌ Configuration error detected!\n");
console.error(" secrets_detection.action 'route_local' is not compatible with mode 'mask'.");
- console.error(
- " Use mode 'route' or change secrets_detection.action to 'block' or 'redact'.\n",
- );
+ console.error(" Use mode 'route' or change secrets_detection.action to 'block' or 'mask'.\n");
console.error("[STARTUP] ✗ Invalid configuration. Exiting for safety.");
process.exit(1);
}
import { afterEach, describe, expect, mock, test } from "bun:test";
-import { PIIDetector } from "./pii-detector";
+import { PIIDetector } from "./detect";
const originalFetch = globalThis.fetch;
const result = await detector.analyzeMessages(messages);
expect(result.hasPII).toBe(true);
- expect(result.entitiesByMessage).toHaveLength(3);
- expect(result.entitiesByMessage[0]).toHaveLength(1);
- expect(result.entitiesByMessage[1]).toHaveLength(1);
- expect(result.entitiesByMessage[2]).toHaveLength(1);
+ // Per-message, per-part: messageEntities[msgIdx][partIdx] = entities
+ expect(result.messageEntities).toHaveLength(3);
+ // Each message has 1 part (string content)
+ expect(result.messageEntities[0]).toHaveLength(1);
+ expect(result.messageEntities[1]).toHaveLength(1);
+ expect(result.messageEntities[2]).toHaveLength(1);
+ // Each part has 1 entity
+ expect(result.messageEntities[0][0]).toHaveLength(1);
+ expect(result.messageEntities[1][0]).toHaveLength(1);
+ expect(result.messageEntities[2][0]).toHaveLength(1);
});
test("detects PII in system message when user message has none", async () => {
const result = await detector.analyzeMessages(messages);
expect(result.hasPII).toBe(true);
- expect(result.entitiesByMessage[0]).toHaveLength(1);
- expect(result.entitiesByMessage[0][0].entity_type).toBe("PERSON");
+ expect(result.messageEntities[0][0]).toHaveLength(1);
+ expect(result.messageEntities[0][0][0].entity_type).toBe("PERSON");
});
test("detects PII in earlier user message", async () => {
const result = await detector.analyzeMessages(messages);
expect(result.hasPII).toBe(true);
- expect(result.entitiesByMessage[0]).toHaveLength(1);
+ expect(result.messageEntities[0][0]).toHaveLength(1);
});
test("returns empty result for no messages", async () => {
const result = await detector.analyzeMessages([]);
expect(result.hasPII).toBe(false);
- expect(result.entitiesByMessage).toHaveLength(0);
- expect(result.newEntities).toHaveLength(0);
+ expect(result.messageEntities).toHaveLength(0);
+ expect(result.allEntities).toHaveLength(0);
});
test("handles multimodal content", async () => {
const result = await detector.analyzeMessages(messages);
expect(result.hasPII).toBe(true);
- expect(result.entitiesByMessage[0]).toHaveLength(1);
+ // Multimodal message has 2 parts
+ expect(result.messageEntities[0]).toHaveLength(2);
+ // First part (text) has 1 entity
+ expect(result.messageEntities[0][0]).toHaveLength(1);
+ // Second part (image) has no entities
+ expect(result.messageEntities[0][1]).toHaveLength(0);
});
test("skips messages with empty content", async () => {
const result = await detector.analyzeMessages(messages);
- expect(result.entitiesByMessage).toHaveLength(2);
- expect(result.entitiesByMessage[0]).toHaveLength(0);
+ expect(result.messageEntities).toHaveLength(2);
+ // First message (empty string) has 1 part with no entities
+ expect(result.messageEntities[0]).toHaveLength(1);
+ expect(result.messageEntities[0][0]).toHaveLength(0);
});
});
import { getConfig } from "../config";
+import { getLanguageDetector, type SupportedLanguage } from "../services/language-detector";
import { extractTextContent, type MessageContent } from "../utils/content";
-import { getLanguageDetector, type SupportedLanguage } from "./language-detector";
export interface PIIEntity {
entity_type: string;
score_threshold?: number;
}
+/**
+ * Per-message, per-part PII detection result
+ * Structure: messageEntities[msgIdx][partIdx] = entities for that part
+ */
export interface PIIDetectionResult {
hasPII: boolean;
- entitiesByMessage: PIIEntity[][];
- newEntities: PIIEntity[];
+ /** Per-message, per-part entities */
+ messageEntities: PIIEntity[][][];
+ /** Flattened list of all entities (for summary/logging) */
+ allEntities: PIIEntity[];
scanTimeMs: number;
language: SupportedLanguage;
languageFallback: boolean;
}
}
+ /**
+ * Analyzes messages for PII with per-part granularity
+ *
+ * For string content, entities are in messageEntities[msgIdx][0].
+ * For array content (multimodal), each text part is scanned separately.
+ */
async analyzeMessages(
messages: Array<{ role: string; content: MessageContent }>,
): Promise<PIIDetectionResult> {
const startTime = Date.now();
const config = getConfig();
+ // Detect language from the last user message
const lastUserMsg = messages.findLast((m) => m.role === "user");
const langText = lastUserMsg ? extractTextContent(lastUserMsg.content) : "";
const langResult = langText
? getLanguageDetector().detect(langText)
: { language: config.pii_detection.fallback_language, usedFallback: true };
- const scannedRoles = ["system", "developer", "user", "assistant"];
+ const scannedRoles = ["system", "developer", "user", "assistant", "tool"];
- const entitiesByMessage = await Promise.all(
- messages.map((message) => {
- const text = extractTextContent(message.content);
- return text && scannedRoles.includes(message.role)
- ? this.detectPII(text, langResult.language)
- : Promise.resolve([]);
+ // Detect PII per message, per content part
+ const messageEntities: PIIEntity[][][] = await Promise.all(
+ messages.map(async (message) => {
+ if (!scannedRoles.includes(message.role)) {
+ return [];
+ }
+
+ // String content → wrap in single-element array
+ if (typeof message.content === "string") {
+ const entities = message.content
+ ? await this.detectPII(message.content, langResult.language)
+ : [];
+ return [entities];
+ }
+
+ // Array content (multimodal) → per-part detection
+ if (Array.isArray(message.content)) {
+ return await Promise.all(
+ message.content.map(async (part) => {
+ if (part.type === "text" && typeof part.text === "string") {
+ return await this.detectPII(part.text, langResult.language);
+ }
+ return [];
+ }),
+ );
+ }
+
+ // Null/undefined content
+ return [];
}),
);
+ const allEntities = messageEntities.flat(2);
+
return {
- hasPII: entitiesByMessage.some((e) => e.length > 0),
- entitiesByMessage,
- newEntities: entitiesByMessage.flat(),
+ hasPII: allEntities.length > 0,
+ messageEntities,
+ allEntities,
scanTimeMs: Date.now() - startTime,
language: langResult.language,
languageFallback: langResult.usedFallback,
--- /dev/null
+import { describe, expect, test } from "bun:test";
+import type { MaskingConfig } from "../config";
+import type { ChatMessage } from "../services/llm-client";
+import { createPIIResult } from "../test-utils/detection-results";
+import type { PIIEntity } from "./detect";
+import {
+ createMaskingContext,
+ flushMaskingBuffer,
+ mask,
+ maskMessages,
+ unmask,
+ unmaskResponse,
+ unmaskStreamChunk,
+} from "./mask";
+
+const defaultConfig: MaskingConfig = {
+ show_markers: false,
+ marker_text: "[protected]",
+};
+
+const configWithMarkers: MaskingConfig = {
+ show_markers: true,
+ marker_text: "[protected]",
+};
+
+describe("PII placeholder format", () => {
+ test("uses [[TYPE_N]] format", () => {
+ const entities: PIIEntity[] = [{ entity_type: "EMAIL_ADDRESS", start: 0, end: 16, score: 1.0 }];
+ const result = mask("john@example.com", entities);
+
+ expect(result.masked).toBe("[[EMAIL_ADDRESS_1]]");
+ });
+
+ test("increments counter per entity type", () => {
+ const entities: PIIEntity[] = [
+ { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 },
+ { entity_type: "EMAIL_ADDRESS", start: 12, end: 19, score: 1.0 },
+ ];
+
+ const result = mask("a@b.com and c@d.com", entities);
+
+ expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_2]]");
+ });
+
+ test("tracks different entity types separately", () => {
+ const entities: PIIEntity[] = [
+ { entity_type: "PERSON", start: 0, end: 11, score: 0.9 },
+ { entity_type: "EMAIL_ADDRESS", start: 13, end: 26, score: 1.0 },
+ ];
+
+ const result = mask("Hans Müller: hans@firma.de", entities);
+
+ expect(result.masked).toBe("[[PERSON_1]]: [[EMAIL_ADDRESS_1]]");
+ });
+});
+
+describe("marker feature", () => {
+ test("adds markers when show_markers is true", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
+
+ const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, configWithMarkers);
+ expect(result).toBe("Email: [protected]john@example.com");
+ });
+
+ test("no markers when show_markers is false", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
+
+ const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, defaultConfig);
+ expect(result).toBe("Email: john@example.com");
+ });
+
+ test("markers work with streaming", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ const { output } = unmaskStreamChunk("", "Hello [[PERSON_1]]!", context, configWithMarkers);
+ expect(output).toBe("Hello [protected]John Doe!");
+ });
+
+ test("markers work with response unmasking", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ const response = {
+ id: "test",
+ object: "chat.completion" as const,
+ created: 1234567890,
+ model: "gpt-4",
+ choices: [
+ {
+ index: 0,
+ message: { role: "assistant" as const, content: "Hello [[PERSON_1]]" },
+ finish_reason: "stop" as const,
+ },
+ ],
+ };
+
+ const result = unmaskResponse(response, context, configWithMarkers);
+ expect(result.choices[0].message.content).toBe("Hello [protected]John Doe");
+ });
+});
+
+describe("maskMessages with PIIDetectionResult", () => {
+ test("masks multiple messages using detection result", () => {
+ const messages: ChatMessage[] = [
+ { role: "user", content: "My email is test@example.com" },
+ { role: "assistant", content: "Got it" },
+ { role: "user", content: "Also john@test.com" },
+ ];
+
+ const detection = createPIIResult([
+ [[{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 1.0 }]],
+ [[]],
+ [[{ entity_type: "EMAIL_ADDRESS", start: 5, end: 18, score: 1.0 }]],
+ ]);
+
+ const { masked, context } = maskMessages(messages, detection);
+
+ expect(masked[0].content).toBe("My email is [[EMAIL_ADDRESS_1]]");
+ expect(masked[1].content).toBe("Got it");
+ expect(masked[2].content).toBe("Also [[EMAIL_ADDRESS_2]]");
+ expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("test@example.com");
+ expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("john@test.com");
+ });
+
+ test("handles multimodal content", () => {
+ const messages: ChatMessage[] = [
+ {
+ role: "user",
+ content: [
+ { type: "text", text: "Contact john@test.com" },
+ { type: "image_url", image_url: { url: "https://example.com/img.jpg" } },
+ ],
+ },
+ ];
+
+ const detection = createPIIResult([
+ [[{ entity_type: "EMAIL_ADDRESS", start: 8, end: 21, score: 1.0 }], []],
+ ]);
+
+ const { masked } = maskMessages(messages, detection);
+
+ const content = masked[0].content as Array<{ type: string; text?: string }>;
+ expect(content[0].text).toBe("Contact [[EMAIL_ADDRESS_1]]");
+ expect(content[1].type).toBe("image_url");
+ });
+});
+
+describe("streaming with PII placeholders", () => {
+ test("buffers partial [[TYPE placeholder", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+
+ const { output, remainingBuffer } = unmaskStreamChunk(
+ "",
+ "Hello [[EMAIL_ADD",
+ context,
+ defaultConfig,
+ );
+
+ expect(output).toBe("Hello ");
+ expect(remainingBuffer).toBe("[[EMAIL_ADD");
+ });
+
+ test("completes buffered placeholder across chunks", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+
+ const { output, remainingBuffer } = unmaskStreamChunk(
+ "[[EMAIL_ADD",
+ "RESS_1]] there",
+ context,
+ defaultConfig,
+ );
+
+ expect(output).toBe("test@test.com there");
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("flushes remaining buffer at end of stream", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+
+ const flushed = flushMaskingBuffer("[[EMAIL_ADD", context, defaultConfig);
+ expect(flushed).toBe("[[EMAIL_ADD");
+ });
+});
+
+describe("PII conflict resolution", () => {
+ test("handles overlapping entities with same start - keeps longer", () => {
+ const text = "Given Eric's feedback";
+ const entities: PIIEntity[] = [
+ { entity_type: "PERSON", start: 6, end: 10, score: 0.85 },
+ { entity_type: "PERSON", start: 6, end: 12, score: 0.8 },
+ ];
+
+ const { masked, context } = mask(text, entities);
+
+ expect(masked).toBe("Given [[PERSON_1]] feedback");
+ expect(context.mapping["[[PERSON_1]]"]).toBe("Eric's");
+ });
+
+ test("handles partially overlapping entities of same type - merges them", () => {
+ const text = "Contact John Smith Jones please";
+ const entities: PIIEntity[] = [
+ { entity_type: "PERSON", start: 8, end: 18, score: 0.9 },
+ { entity_type: "PERSON", start: 13, end: 25, score: 0.7 },
+ ];
+
+ const { masked } = mask(text, entities);
+
+ expect(masked).toBe("Contact [[PERSON_1]]please");
+ });
+
+ test("keeps adjacent non-overlapping entities", () => {
+ const text = "HansMüller";
+ const entities: PIIEntity[] = [
+ { entity_type: "PERSON", start: 0, end: 4, score: 0.9 },
+ { entity_type: "PERSON", start: 4, end: 10, score: 0.9 },
+ ];
+
+ const { masked } = mask(text, entities);
+
+ expect(masked).toBe("[[PERSON_1]][[PERSON_2]]");
+ });
+});
+
+describe("mask -> unmask roundtrip", () => {
+ test("preserves original data through roundtrip", () => {
+ const originalText = "Contact Hans Müller at hans@firma.de or call +49123456789";
+ const entities: PIIEntity[] = [
+ { entity_type: "PERSON", start: 8, end: 19, score: 0.9 },
+ { entity_type: "EMAIL_ADDRESS", start: 23, end: 36, score: 1.0 },
+ { entity_type: "PHONE_NUMBER", start: 45, end: 57, score: 0.95 },
+ ];
+
+ const { masked, context } = mask(originalText, entities);
+
+ expect(masked).not.toContain("Hans Müller");
+ expect(masked).not.toContain("hans@firma.de");
+ expect(masked).not.toContain("+49123456789");
+
+ const llmResponse = `I see ${masked.match(/\[\[PERSON_1\]\]/)?.[0]}, email ${masked.match(/\[\[EMAIL_ADDRESS_1\]\]/)?.[0]}`;
+ const unmasked = unmask(llmResponse, context, defaultConfig);
+
+ expect(unmasked).toContain("Hans Müller");
+ expect(unmasked).toContain("hans@firma.de");
+ });
+});
+
+describe("HTML context handling", () => {
+ test("unmasks placeholders in HTML without encoding issues", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah.chen@hospital.org";
+
+ const htmlResponse = `<p>Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]</p>`;
+ const result = unmask(htmlResponse, context, defaultConfig);
+
+ expect(result).toBe("<p>Contact Dr. Sarah Chen at sarah.chen@hospital.org</p>");
+ });
+
+ test("works with complex HTML structures", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah@hospital.org";
+
+ const complexHtml = `
+ <div class="profile">
+ <h1>[[PERSON_1]]</h1>
+ <a href="mailto:[[EMAIL_ADDRESS_1]]">[[EMAIL_ADDRESS_1]]</a>
+ </div>
+ `;
+
+ const result = unmask(complexHtml, context, defaultConfig);
+
+ expect(result).toContain("Dr. Sarah Chen");
+ expect(result).toContain("sarah@hospital.org");
+ expect(result).not.toContain("[[");
+ });
+});
+
+describe("unmaskResponse", () => {
+ test("unmasks all choices in response", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ const response = {
+ id: "chatcmpl-123",
+ object: "chat.completion" as const,
+ created: 1234567890,
+ model: "gpt-4",
+ choices: [
+ {
+ index: 0,
+ message: {
+ role: "assistant" as const,
+ content: "Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]",
+ },
+ finish_reason: "stop" as const,
+ },
+ ],
+ usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+ };
+
+ const result = unmaskResponse(response, context, defaultConfig);
+
+ expect(result.choices[0].message.content).toBe("Contact John Doe at test@test.com");
+ expect(result.id).toBe("chatcmpl-123");
+ expect(result.model).toBe("gpt-4");
+ });
+});
+
+describe("edge cases", () => {
+ test("handles unicode in masked text", () => {
+ const text = "Kontakt: François Müller";
+ const entities: PIIEntity[] = [{ entity_type: "PERSON", start: 9, end: 24, score: 0.9 }];
+
+ const { masked, context } = mask(text, entities);
+ expect(masked).toBe("Kontakt: [[PERSON_1]]");
+
+ const unmasked = unmask(masked, context, defaultConfig);
+ expect(unmasked).toBe("Kontakt: François Müller");
+ });
+
+ test("handles empty text", () => {
+ const { masked, context } = mask("", []);
+ expect(masked).toBe("");
+ expect(unmask("", context, defaultConfig)).toBe("");
+ });
+
+ test("reuses placeholder for duplicate values", () => {
+ const text = "a@b.com and again a@b.com";
+ const entities: PIIEntity[] = [
+ { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 },
+ { entity_type: "EMAIL_ADDRESS", start: 18, end: 25, score: 1.0 },
+ ];
+
+ const result = mask(text, entities);
+
+ expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and again [[EMAIL_ADDRESS_1]]");
+ expect(Object.keys(result.context.mapping)).toHaveLength(1);
+ });
+});
--- /dev/null
+import type { MaskingConfig } from "../config";
+import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
+import { resolveConflicts } from "../utils/conflict-resolver";
+import {
+ createPlaceholderContext,
+ flushBuffer,
+ incrementAndGenerate,
+ type MaskResult,
+ type PlaceholderContext,
+ processStreamChunk,
+ replaceWithPlaceholders,
+ restorePlaceholders,
+ restoreResponsePlaceholders,
+ transformMessagesPerPart,
+} from "../utils/message-transform";
+import {
+ generatePlaceholder as generatePlaceholderFromFormat,
+ PII_PLACEHOLDER_FORMAT,
+} from "../utils/placeholders";
+import type { PIIDetectionResult, PIIEntity } from "./detect";
+
+export type { MaskResult } from "../utils/message-transform";
+
+/**
+ * Creates a new masking context for a request
+ */
+export function createMaskingContext(): PlaceholderContext {
+ return createPlaceholderContext();
+}
+
+/**
+ * Generates a placeholder for a PII entity type
+ */
+function generatePlaceholder(entityType: string, context: PlaceholderContext): string {
+ return incrementAndGenerate(entityType, context, (type, count) =>
+ generatePlaceholderFromFormat(PII_PLACEHOLDER_FORMAT, type, count),
+ );
+}
+
+/**
+ * Creates formatValue function from masking config
+ */
+function getFormatValue(config: MaskingConfig): ((original: string) => string) | undefined {
+ return config.show_markers ? (original: string) => `${config.marker_text}${original}` : undefined;
+}
+
+/**
+ * Masks PII entities in text, replacing them with placeholders
+ */
+export function mask(
+ text: string,
+ entities: PIIEntity[],
+ context?: PlaceholderContext,
+): MaskResult {
+ const ctx = context || createMaskingContext();
+ const masked = replaceWithPlaceholders(
+ text,
+ entities,
+ ctx,
+ (e) => e.entity_type,
+ generatePlaceholder,
+ resolveConflicts,
+ );
+ return { masked, context: ctx };
+}
+
+/**
+ * Unmasks text by replacing placeholders with original values
+ *
+ * Optionally adds markers to indicate protected content
+ */
+export function unmask(text: string, context: PlaceholderContext, config: MaskingConfig): string {
+ return restorePlaceholders(text, context, getFormatValue(config));
+}
+
+/**
+ * Masks messages using per-part entity detection results
+ *
+ * Uses transformMessagesPerPart for the common iteration pattern.
+ */
+export function maskMessages(
+ messages: ChatMessage[],
+ detection: PIIDetectionResult,
+): { masked: ChatMessage[]; context: PlaceholderContext } {
+ const context = createMaskingContext();
+
+ const masked = transformMessagesPerPart(
+ messages,
+ detection.messageEntities,
+ (text, entities, ctx) => mask(text, entities, ctx).masked,
+ context,
+ );
+
+ return { masked, context };
+}
+
+/**
+ * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found
+ *
+ * Returns the unmasked portion and any remaining buffer that might contain partial placeholders
+ */
+export function unmaskStreamChunk(
+ buffer: string,
+ newChunk: string,
+ context: PlaceholderContext,
+ config: MaskingConfig,
+): { output: string; remainingBuffer: string } {
+ return processStreamChunk(buffer, newChunk, context, (text, ctx) => unmask(text, ctx, config));
+}
+
+/**
+ * Flushes remaining buffer at end of stream
+ */
+export function flushMaskingBuffer(
+ buffer: string,
+ context: PlaceholderContext,
+ config: MaskingConfig,
+): string {
+ return flushBuffer(buffer, context, (text, ctx) => unmask(text, ctx, config));
+}
+
+/**
+ * Unmasks a chat completion response by replacing placeholders in all choices
+ */
+export function unmaskResponse(
+ response: ChatCompletionResponse,
+ context: PlaceholderContext,
+ config: MaskingConfig,
+): ChatCompletionResponse {
+ return restoreResponsePlaceholders(response, context, getFormatValue(config));
+}
import { Hono } from "hono";
import pkg from "../../package.json";
import { getConfig } from "../config";
+import { getPIIDetector } from "../pii/detect";
import { getRouter } from "../services/decision";
-import { getPIIDetector } from "../services/pii-detector";
export const infoRoutes = new Hono();
import { proxy } from "hono/proxy";
import { z } from "zod";
import { getConfig, type MaskingConfig } from "../config";
-import {
- detectSecrets,
- extractTextFromRequest,
- type SecretsDetectionResult,
-} from "../secrets/detect";
-import { type RedactionContext, redactSecrets, unredactResponse } from "../secrets/redact";
+import { unmaskResponse as unmaskPIIResponse } from "../pii/mask";
+import { detectSecretsInMessages, type MessageSecretsResult } from "../secrets/detect";
+import { maskMessages as maskSecretsMessages, unmaskSecretsResponse } from "../secrets/mask";
import { getRouter, type MaskDecision, type RoutingDecision } from "../services/decision";
import {
type ChatCompletionRequest,
type LLMResult,
} from "../services/llm-client";
import { logRequest, type RequestLogData } from "../services/logger";
-import { unmaskResponse } from "../services/masking";
import { createUnmaskingStream } from "../services/stream-transformer";
-import { type ContentPart, extractTextContent } from "../utils/content";
+import { extractTextContent } from "../utils/content";
+import type { PlaceholderContext } from "../utils/message-transform";
// Request validation schema
const ChatCompletionSchema = z
statusCode: number,
errorMessage: string,
decision?: RoutingDecision,
- secretsResult?: SecretsDetectionResult,
+ secretsResult?: MessageSecretsResult,
maskedContent?: string,
): RequestLogData {
const config = getConfig();
model: body.model || "unknown",
piiDetected: decision?.piiResult.hasPII ?? false,
entities: decision
- ? [...new Set(decision.piiResult.newEntities.map((e) => e.entity_type))]
+ ? [...new Set(decision.piiResult.allEntities.map((e) => e.entity_type))]
: [],
latencyMs: Date.now() - startTime,
scanTimeMs: decision?.piiResult.scanTimeMs ?? 0,
const router = getRouter();
// Track secrets detection state for response handling
- let secretsResult: SecretsDetectionResult | undefined;
- let redactionContext: RedactionContext | undefined;
- let secretsRedacted = false;
+ let secretsResult: MessageSecretsResult | undefined;
+ let secretsMaskingContext: PlaceholderContext | undefined;
+ let secretsMasked = false;
- // Secrets detection runs before PII detection
+ // Secrets detection runs before PII detection (per-part)
if (config.secrets_detection.enabled) {
- const text = extractTextFromRequest(body);
- secretsResult = detectSecrets(text, config.secrets_detection);
+ secretsResult = detectSecretsInMessages(body.messages, config.secrets_detection);
if (secretsResult.detected) {
const secretTypes = secretsResult.matches.map((m) => m.type);
// Block action - return 400 error
if (config.secrets_detection.action === "block") {
- // Set headers before returning error
c.header("X-PasteGuard-Secrets-Detected", "true");
c.header("X-PasteGuard-Secrets-Types", secretTypesStr);
- // Log metadata only (no secret content)
logRequest(
{
timestamp: new Date().toISOString(),
mode: config.mode,
- provider: "openai", // Note: Request never reached provider
+ provider: "openai",
model: body.model || "unknown",
piiDetected: false,
entities: [],
);
}
- // Redact action - replace secrets with placeholders and continue
- if (config.secrets_detection.action === "redact") {
- const redactedMessages = redactMessagesWithSecrets(body.messages, secretsResult);
- body = { ...body, messages: redactedMessages.messages };
- redactionContext = redactedMessages.context;
- secretsRedacted = true;
+ // Mask action - replace secrets with placeholders (per-part)
+ if (config.secrets_detection.action === "mask") {
+ const result = maskSecretsMessages(body.messages, secretsResult);
+ body = { ...body, messages: result.masked };
+ secretsMaskingContext = result.context;
+ secretsMasked = true;
}
// route_local action is handled in handleCompletion via secretsResult
startTime,
router,
secretsResult,
- redactionContext,
- secretsRedacted,
+ secretsMaskingContext,
+ secretsMasked,
);
},
);
-/**
- * Redacts secrets in all messages based on detection result
- * Returns redacted messages and the redaction context for unredaction
- */
-function redactMessagesWithSecrets(
- messages: ChatMessage[],
- secretsResult: SecretsDetectionResult,
-): { messages: ChatMessage[]; context: RedactionContext } {
- // Build a map of message content to redactions
- // Since we concatenated all messages with \n, we need to track positions per message
- let currentOffset = 0;
- const messagePositions: { start: number; end: number }[] = [];
-
- for (const msg of messages) {
- const text = extractTextContent(msg.content);
- const length = text.length;
- messagePositions.push({ start: currentOffset, end: currentOffset + length });
- currentOffset += length + 1; // +1 for \n separator
- }
-
- // Create redaction context
- let context: RedactionContext = {
- mapping: {},
- reverseMapping: {},
- counters: {},
- };
-
- // Apply redactions to each message
- const redactedMessages = messages.map((msg, i) => {
- // Handle null/undefined content
- if (!msg.content) {
- return msg;
- }
-
- // Handle array content (multimodal messages)
- if (Array.isArray(msg.content)) {
- const msgPos = messagePositions[i];
-
- // Filter redactions for this message
- const messageRedactions = (secretsResult.redactions || [])
- .filter((r) => r.start >= msgPos.start && r.end <= msgPos.end)
- .map((r) => ({
- ...r,
- start: r.start - msgPos.start,
- end: r.end - msgPos.start,
- }));
-
- if (messageRedactions.length === 0) {
- return msg;
- }
-
- // Track offset position within the concatenated text for this message
- // (matches how extractTextContent joins parts with \n)
- let partOffset = 0;
-
- // Redact only text parts of array content with proper offset tracking
- const redactedContent = msg.content.map((part: ContentPart) => {
- if (part.type === "text" && typeof part.text === "string") {
- const partLength = part.text.length;
-
- // Find redactions that apply to this specific part
- const partRedactions = messageRedactions
- .filter((r) => r.start < partOffset + partLength && r.end > partOffset)
- .map((r) => ({
- ...r,
- start: Math.max(0, r.start - partOffset),
- end: Math.min(partLength, r.end - partOffset),
- }));
-
- if (partRedactions.length > 0) {
- const { redacted, context: updatedContext } = redactSecrets(
- part.text,
- partRedactions,
- context,
- );
- context = updatedContext;
- partOffset += partLength + 1; // +1 for \n separator
- return { ...part, text: redacted };
- }
-
- partOffset += partLength + 1; // +1 for \n separator
- return part;
- }
- return part;
- });
-
- return { ...msg, content: redactedContent };
- }
-
- // Handle string content (text-only messages)
- if (typeof msg.content !== "string") {
- return msg;
- }
-
- const msgPos = messagePositions[i];
-
- // Filter redactions that fall within this message's position
- const messageRedactions = (secretsResult.redactions || [])
- .filter((r) => r.start >= msgPos.start && r.end <= msgPos.end)
- .map((r) => ({
- ...r,
- start: r.start - msgPos.start,
- end: r.end - msgPos.start,
- }));
-
- if (messageRedactions.length === 0) {
- return msg;
- }
-
- const { redacted, context: updatedContext } = redactSecrets(
- msg.content,
- messageRedactions,
- context,
- );
- context = updatedContext;
-
- return { ...msg, content: redacted };
- });
-
- return { messages: redactedMessages, context };
-}
-
/**
* Handle chat completion for both route and mask modes
*/
decision: RoutingDecision,
startTime: number,
router: ReturnType<typeof getRouter>,
- secretsResult?: SecretsDetectionResult,
- redactionContext?: RedactionContext,
- secretsRedacted?: boolean,
+ secretsResult?: MessageSecretsResult,
+ secretsMaskingContext?: PlaceholderContext,
+ secretsMasked?: boolean,
) {
const client = router.getClient(decision.provider);
const maskingConfig = router.getMaskingConfig();
c.header("X-PasteGuard-Secrets-Detected", "true");
c.header("X-PasteGuard-Secrets-Types", secretsTypes.join(","));
}
- if (secretsRedacted) {
- c.header("X-PasteGuard-Secrets-Redacted", "true");
+ if (secretsMasked) {
+ c.header("X-PasteGuard-Secrets-Masked", "true");
}
try {
maskingConfig,
secretsDetected,
secretsTypes,
- redactionContext,
+ secretsMaskingContext,
);
}
maskingConfig,
secretsDetected,
secretsTypes,
- redactionContext,
+ secretsMaskingContext,
);
} catch (error) {
console.error("LLM request error:", error);
maskingConfig: MaskingConfig,
secretsDetected?: boolean,
secretsTypes?: string[],
- redactionContext?: RedactionContext,
+ secretsMaskingContext?: PlaceholderContext,
) {
logRequest(
createLogData(
// Determine if we need to transform the stream
const needsPIIUnmasking = isMaskDecision(decision);
- const needsSecretsUnredaction = redactionContext !== undefined;
+ const needsSecretsUnmasking = secretsMaskingContext !== undefined;
- if (needsPIIUnmasking || needsSecretsUnredaction) {
+ if (needsPIIUnmasking || needsSecretsUnmasking) {
const unmaskingStream = createUnmaskingStream(
result.response,
needsPIIUnmasking ? decision.maskingContext : undefined,
maskingConfig,
- redactionContext,
+ secretsMaskingContext,
);
return c.body(unmaskingStream);
}
maskingConfig: MaskingConfig,
secretsDetected?: boolean,
secretsTypes?: string[],
- redactionContext?: RedactionContext,
+ secretsMaskingContext?: PlaceholderContext,
) {
logRequest(
createLogData(
// First unmask PII if needed
if (isMaskDecision(decision)) {
- response = unmaskResponse(response, decision.maskingContext, maskingConfig);
+ response = unmaskPIIResponse(response, decision.maskingContext, maskingConfig);
}
- // Then unredact secrets if needed
- if (redactionContext) {
- response = unredactResponse(response, redactionContext);
+ // Then unmask secrets if needed
+ if (secretsMaskingContext) {
+ response = unmaskSecretsResponse(response, secretsMaskingContext);
}
return c.json(response);
provider: decision.provider,
model: result.model,
piiDetected: decision.piiResult.hasPII,
- entities: [...new Set(decision.piiResult.newEntities.map((e) => e.entity_type))],
+ entities: [...new Set(decision.piiResult.allEntities.map((e) => e.entity_type))],
latencyMs: Date.now() - startTime,
scanTimeMs: decision.piiResult.scanTimeMs,
promptTokens: response?.usage?.prompt_tokens,
import { describe, expect, test } from "bun:test";
import type { SecretsDetectionConfig } from "../config";
-import type { ChatCompletionRequest } from "../services/llm-client";
-import { detectSecrets, extractTextFromRequest } from "./detect";
+import { detectSecrets } from "./detect";
const defaultConfig: SecretsDetectionConfig = {
enabled: true,
expect(result.matches).toHaveLength(1);
expect(result.matches[0].type).toBe("OPENSSH_PRIVATE_KEY");
expect(result.matches[0].count).toBe(1);
- expect(result.redactions).toBeDefined();
- expect(result.redactions?.length).toBe(1);
+ expect(result.locations).toBeDefined();
+ expect(result.locations?.length).toBe(1);
});
test("detects RSA private key", () => {
expect(result.matches).toHaveLength(1);
expect(result.matches[0].type).toBe("OPENSSH_PRIVATE_KEY");
expect(result.matches[0].count).toBe(2);
- expect(result.redactions?.length).toBe(2);
+ expect(result.locations?.length).toBe(2);
});
test("detects multiple secrets of different types", () => {
expect(result.matches[0].count).toBe(1); // Should be 1, not 2
});
- test("redactions are sorted by start position descending", () => {
+ test("locations are sorted by start position descending", () => {
const text = `${opensshKey}\n\n${rsaKey}`;
const result = detectSecrets(text, defaultConfig);
- expect(result.redactions).toBeDefined();
- if (result.redactions && result.redactions.length > 1) {
- for (let i = 0; i < result.redactions.length - 1; i++) {
- expect(result.redactions[i].start).toBeGreaterThan(result.redactions[i + 1].start);
+ expect(result.locations).toBeDefined();
+ if (result.locations && result.locations.length > 1) {
+ for (let i = 0; i < result.locations.length - 1; i++) {
+ expect(result.locations[i].start).toBeGreaterThan(result.locations[i + 1].start);
}
}
});
expect(result.matches).toHaveLength(1);
expect(result.matches[0].type).toBe("API_KEY_OPENAI");
expect(result.matches[0].count).toBe(1);
- expect(result.redactions).toBeDefined();
- expect(result.redactions?.[0].type).toBe("API_KEY_OPENAI");
+ expect(result.locations).toBeDefined();
+ expect(result.locations?.[0].type).toBe("API_KEY_OPENAI");
});
test("detects AWS access key", () => {
expect(result.detected).toBe(false);
});
- test("redaction positions are correct", () => {
+ test("location positions are correct", () => {
const text = "config: DB_PASSWORD=mysecretpassword123 here";
const result = detectSecrets(text, passwordConfig);
- expect(result.redactions).toBeDefined();
- expect(result.redactions?.length).toBe(1);
- const redacted = text.slice(result.redactions![0].start, result.redactions![0].end);
- expect(redacted).toBe("DB_PASSWORD=mysecretpassword123");
+ expect(result.locations).toBeDefined();
+ expect(result.locations?.length).toBe(1);
+ const matched = text.slice(result.locations![0].start, result.locations![0].end);
+ expect(matched).toBe("DB_PASSWORD=mysecretpassword123");
});
});
expect(result.detected).toBe(false);
});
- test("redaction positions are correct", () => {
+ test("location positions are correct", () => {
const text = "export APP_SECRET=mysupersecretvalue123 # comment";
const result = detectSecrets(text, secretConfig);
- expect(result.redactions).toBeDefined();
- expect(result.redactions?.length).toBe(1);
- const redacted = text.slice(result.redactions![0].start, result.redactions![0].end);
- expect(redacted).toBe("APP_SECRET=mysupersecretvalue123");
+ expect(result.locations).toBeDefined();
+ expect(result.locations?.length).toBe(1);
+ const matched = text.slice(result.locations![0].start, result.locations![0].end);
+ expect(matched).toBe("APP_SECRET=mysupersecretvalue123");
});
});
expect(result.detected).toBe(false);
});
- test("redaction covers full connection string", () => {
+ test("location covers full connection string", () => {
const text = "export DB=postgres://admin:secret123@db.example.com:5432/prod";
const result = detectSecrets(text, connConfig);
- expect(result.redactions).toBeDefined();
- expect(result.redactions?.length).toBe(1);
- const redacted = text.slice(result.redactions![0].start, result.redactions![0].end);
- expect(redacted).toBe("postgres://admin:secret123@db.example.com:5432/prod");
+ expect(result.locations).toBeDefined();
+ expect(result.locations?.length).toBe(1);
+ const matched = text.slice(result.locations![0].start, result.locations![0].end);
+ expect(matched).toBe("postgres://admin:secret123@db.example.com:5432/prod");
});
});
expect(result.matches.length).toBeGreaterThanOrEqual(4);
});
- test("redaction positions are correct for all types", () => {
+ test("location positions are correct for all types", () => {
const text = `Key: ${awsAccessKey} and ${githubToken}`;
const result = detectSecrets(text, allConfig);
- expect(result.redactions).toBeDefined();
- expect(result.redactions?.length).toBe(2);
+ expect(result.locations).toBeDefined();
+ expect(result.locations?.length).toBe(2);
- // Verify redactions point to correct positions
- for (const redaction of result.redactions || []) {
- const extracted = text.slice(redaction.start, redaction.end);
+ // Verify locations point to correct positions
+ for (const location of result.locations || []) {
+ const extracted = text.slice(location.start, location.end);
expect(extracted.length).toBeGreaterThan(10);
}
});
});
-
-describe("extractTextFromRequest", () => {
- test("extracts text from simple messages", () => {
- const request: ChatCompletionRequest = {
- messages: [
- { role: "user", content: "Hello world" },
- { role: "assistant", content: "Hi there" },
- ],
- };
- const text = extractTextFromRequest(request);
- expect(text).toBe("Hello world\nHi there");
- });
-
- test("extracts text from system messages", () => {
- const request: ChatCompletionRequest = {
- messages: [
- { role: "system", content: "You are helpful" },
- { role: "user", content: "Hello" },
- ],
- };
- const text = extractTextFromRequest(request);
- expect(text).toBe("You are helpful\nHello");
- });
-
- test("filters out empty messages", () => {
- const request: ChatCompletionRequest = {
- messages: [
- { role: "user", content: "Hello" },
- { role: "assistant", content: "" },
- { role: "user", content: "World" },
- ],
- };
- const text = extractTextFromRequest(request);
- expect(text).toBe("Hello\nWorld");
- });
-
- test("handles single message", () => {
- const request: ChatCompletionRequest = {
- messages: [{ role: "user", content: "Test" }],
- };
- const text = extractTextFromRequest(request);
- expect(text).toBe("Test");
- });
-
- test("handles empty messages array", () => {
- const request: ChatCompletionRequest = {
- messages: [],
- };
- const text = extractTextFromRequest(request);
- expect(text).toBe("");
- });
-
- test("extracts all message content in order", () => {
- const request: ChatCompletionRequest = {
- messages: [
- { role: "system", content: "System" },
- { role: "user", content: "User1" },
- { role: "assistant", content: "Assistant" },
- { role: "user", content: "User2" },
- ],
- };
- const text = extractTextFromRequest(request);
- expect(text).toBe("System\nUser1\nAssistant\nUser2");
- });
-});
import type { SecretsDetectionConfig } from "../config";
-import type { ChatCompletionRequest } from "../services/llm-client";
-import { extractTextContent } from "../utils/content";
+import type { ChatMessage } from "../services/llm-client";
+import type { ContentPart } from "../utils/content";
import { patternDetectors } from "./patterns";
-import type { SecretsDetectionResult, SecretsMatch, SecretsRedaction } from "./patterns/types";
+import type {
+ MessageSecretsResult,
+ SecretLocation,
+ SecretsDetectionResult,
+ SecretsMatch,
+} from "./patterns/types";
-// Re-export types from patterns module for backwards compatibility
export type {
+ MessageSecretsResult,
SecretEntityType,
+ SecretLocation,
SecretsDetectionResult,
SecretsMatch,
- SecretsRedaction,
} from "./patterns/types";
-/**
- * Extracts all text content from an OpenAI chat completion request
- *
- * Concatenates content from all messages (system, user, assistant) for secrets scanning.
- * Handles both string content (text-only) and array content (multimodal messages).
- *
- * Returns concatenated text for secrets scanning.
- */
-export function extractTextFromRequest(body: ChatCompletionRequest): string {
- return body.messages
- .map((message) => extractTextContent(message.content))
- .filter((text) => text.length > 0)
- .join("\n");
-}
-
/**
* Detects secret material (e.g. private keys, API keys, tokens) in text
*
// Aggregate results from all pattern detectors
const allMatches: SecretsMatch[] = [];
- const allRedactions: SecretsRedaction[] = [];
+ const allLocations: SecretLocation[] = [];
for (const detector of patternDetectors) {
// Skip detectors that don't handle any enabled types
const result = detector.detect(textToScan, enabledTypes);
allMatches.push(...result.matches);
- if (result.redactions) {
- allRedactions.push(...result.redactions);
+ if (result.locations) {
+ allLocations.push(...result.locations);
}
}
- // Sort redactions by start position (descending) for safe replacement
- allRedactions.sort((a, b) => b.start - a.start);
+ // Sort locations by start position (descending) for safe replacement
+ allLocations.sort((a, b) => b.start - a.start);
return {
detected: allMatches.length > 0,
matches: allMatches,
- redactions: allRedactions.length > 0 ? allRedactions : undefined,
+ locations: allLocations.length > 0 ? allLocations : undefined,
+ };
+}
+
+/**
+ * Detects secrets in chat messages with per-part granularity
+ *
+ * For string content, partIdx is always 0.
+ * For array content (multimodal), each text part is scanned separately.
+ * This avoids complex offset mapping when applying masks.
+ */
+export function detectSecretsInMessages(
+ messages: ChatMessage[],
+ config: SecretsDetectionConfig,
+): MessageSecretsResult {
+ if (!config.enabled) {
+ return {
+ detected: false,
+ matches: [],
+ messageLocations: messages.map(() => []),
+ };
+ }
+
+ const matchCounts = new Map<string, number>();
+
+ const messageLocations: SecretLocation[][][] = messages.map((message) => {
+ // String content → single part at index 0
+ if (typeof message.content === "string") {
+ const result = detectSecrets(message.content, config);
+ for (const match of result.matches) {
+ matchCounts.set(match.type, (matchCounts.get(match.type) || 0) + match.count);
+ }
+ return [result.locations || []];
+ }
+
+ // Array content (multimodal) → one array per part
+ if (Array.isArray(message.content)) {
+ return message.content.map((part: ContentPart) => {
+ if (part.type !== "text" || typeof part.text !== "string") {
+ return [];
+ }
+ const result = detectSecrets(part.text, config);
+ for (const match of result.matches) {
+ matchCounts.set(match.type, (matchCounts.get(match.type) || 0) + match.count);
+ }
+ return result.locations || [];
+ });
+ }
+
+ // Null/undefined content
+ return [];
+ });
+
+ const allMatches: SecretsMatch[] = [];
+ for (const [type, count] of matchCounts) {
+ allMatches.push({ type: type as SecretLocation["type"], count });
+ }
+
+ const hasLocations = messageLocations.some((msg) => msg.some((part) => part.length > 0));
+
+ return {
+ detected: hasLocations,
+ matches: allMatches,
+ messageLocations,
};
}
--- /dev/null
+import { describe, expect, test } from "bun:test";
+import { createSecretsResult } from "../test-utils/detection-results";
+import type { SecretLocation } from "./detect";
+import {
+ createSecretsMaskingContext,
+ flushSecretsMaskingBuffer,
+ maskMessages,
+ maskSecrets,
+ unmaskSecrets,
+ unmaskSecretsResponse,
+ unmaskSecretsStreamChunk,
+} from "./mask";
+
+const sampleSecret = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx";
+
+describe("secrets placeholder format", () => {
+ test("uses [[SECRET_MASKED_TYPE_N]] format", () => {
+ const text = `My API key is ${sampleSecret}`;
+ const locations: SecretLocation[] = [
+ { start: 14, end: 14 + sampleSecret.length, type: "API_KEY_OPENAI" },
+ ];
+ const result = maskSecrets(text, locations);
+
+ expect(result.masked).toBe("My API key is [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ });
+
+ test("increments counter per secret type", () => {
+ const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
+ const text = `Key1: ${sampleSecret} Key2: ${anotherSecret}`;
+ const locations: SecretLocation[] = [
+ { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" },
+ {
+ start: 6 + sampleSecret.length + 7,
+ end: 6 + sampleSecret.length + 7 + anotherSecret.length,
+ type: "API_KEY_OPENAI",
+ },
+ ];
+ const result = maskSecrets(text, locations);
+
+ expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_2]]");
+ });
+
+ test("tracks different secret types separately", () => {
+ const awsKey = "AKIAIOSFODNN7EXAMPLE";
+ const text = `OpenAI: ${sampleSecret} AWS: ${awsKey}`;
+ const locations: SecretLocation[] = [
+ { start: 8, end: 8 + sampleSecret.length, type: "API_KEY_OPENAI" },
+ {
+ start: 8 + sampleSecret.length + 6,
+ end: 8 + sampleSecret.length + 6 + awsKey.length,
+ type: "API_KEY_AWS",
+ },
+ ];
+ const result = maskSecrets(text, locations);
+
+ expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ expect(result.masked).toContain("[[SECRET_MASKED_API_KEY_AWS_1]]");
+ });
+});
+
+describe("maskMessages with MessageSecretsResult", () => {
+ test("masks secrets in multiple messages", () => {
+ const messages = [
+ { role: "user" as const, content: `My key is ${sampleSecret}` },
+ { role: "assistant" as const, content: "I'll help you with that." },
+ ];
+ const detection = createSecretsResult([
+ [[{ start: 10, end: 10 + sampleSecret.length, type: "API_KEY_OPENAI" }]],
+ [[]],
+ ]);
+
+ const { masked, context } = maskMessages(messages, detection);
+
+ expect(masked[0].content).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ expect(masked[0].content).not.toContain(sampleSecret);
+ expect(masked[1].content).toBe("I'll help you with that.");
+ expect(Object.keys(context.mapping)).toHaveLength(1);
+ });
+
+ test("shares context across messages - same secret gets same placeholder", () => {
+ const messages = [
+ { role: "user" as const, content: `Key1: ${sampleSecret}` },
+ { role: "user" as const, content: `Key2: ${sampleSecret}` },
+ ];
+ const detection = createSecretsResult([
+ [[{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }]],
+ [[{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }]],
+ ]);
+
+ const { masked, context } = maskMessages(messages, detection);
+
+ expect(masked[0].content).toBe("Key1: [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ expect(masked[1].content).toBe("Key2: [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ expect(Object.keys(context.mapping)).toHaveLength(1);
+ });
+
+ test("handles multimodal array content", () => {
+ const messages = [
+ {
+ role: "user" as const,
+ content: [
+ { type: "text", text: `Key: ${sampleSecret}` },
+ { type: "image_url", image_url: { url: "https://example.com/img.jpg" } },
+ ],
+ },
+ ];
+ const detection = createSecretsResult([
+ [[{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }], []],
+ ]);
+
+ const { masked } = maskMessages(messages, detection);
+
+ const content = masked[0].content as Array<{ type: string; text?: string }>;
+ expect(content[0].text).toBe("Key: [[SECRET_MASKED_API_KEY_OPENAI_1]]");
+ expect(content[1].type).toBe("image_url");
+ });
+});
+
+describe("streaming with secrets placeholders", () => {
+ test("buffers partial [[SECRET_MASKED placeholder", () => {
+ const context = createSecretsMaskingContext();
+ context.mapping["[[SECRET_MASKED_API_KEY_OPENAI_1]]"] = sampleSecret;
+
+ const { output, remainingBuffer } = unmaskSecretsStreamChunk("", "Key: [[SECRET_MAS", context);
+
+ expect(output).toBe("Key: ");
+ expect(remainingBuffer).toBe("[[SECRET_MAS");
+ });
+
+ test("completes buffered placeholder across chunks", () => {
+ const context = createSecretsMaskingContext();
+ context.mapping["[[SECRET_MASKED_API_KEY_OPENAI_1]]"] = sampleSecret;
+
+ const { output, remainingBuffer } = unmaskSecretsStreamChunk(
+ "[[SECRET_MAS",
+ "KED_API_KEY_OPENAI_1]] done",
+ context,
+ );
+
+ expect(output).toBe(`${sampleSecret} done`);
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("flushes incomplete buffer as-is", () => {
+ const context = createSecretsMaskingContext();
+ const result = flushSecretsMaskingBuffer("[[SECRET_MAS", context);
+ expect(result).toBe("[[SECRET_MAS");
+ });
+});
+
+describe("mask -> unmask roundtrip", () => {
+ test("preserves original data through roundtrip", () => {
+ const originalText = `
+Here are my credentials:
+OpenAI API Key: ${sampleSecret}
+Please store them securely.
+`;
+ const locations: SecretLocation[] = [
+ {
+ start: originalText.indexOf(sampleSecret),
+ end: originalText.indexOf(sampleSecret) + sampleSecret.length,
+ type: "API_KEY_OPENAI",
+ },
+ ];
+
+ const { masked, context } = maskSecrets(originalText, locations);
+
+ expect(masked).not.toContain(sampleSecret);
+ expect(masked).toContain("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
+
+ const restored = unmaskSecrets(masked, context);
+ expect(restored).toBe(originalText);
+ });
+});
+
+describe("unmaskSecretsResponse", () => {
+ test("unmasks all choices in response", () => {
+ const context = createSecretsMaskingContext();
+ context.mapping["[[SECRET_MASKED_API_KEY_OPENAI_1]]"] = sampleSecret;
+
+ const response = {
+ id: "test",
+ object: "chat.completion" as const,
+ created: Date.now(),
+ model: "gpt-4",
+ choices: [
+ {
+ index: 0,
+ message: {
+ role: "assistant" as const,
+ content: "Your key is [[SECRET_MASKED_API_KEY_OPENAI_1]]",
+ },
+ finish_reason: "stop" as const,
+ },
+ ],
+ };
+
+ const result = unmaskSecretsResponse(response, context);
+ expect(result.choices[0].message.content).toBe(`Your key is ${sampleSecret}`);
+ });
+
+ test("preserves response structure", () => {
+ const context = createSecretsMaskingContext();
+ const response = {
+ id: "test-id",
+ object: "chat.completion" as const,
+ created: 12345,
+ model: "gpt-4-turbo",
+ choices: [
+ {
+ index: 0,
+ message: { role: "assistant" as const, content: "Hello" },
+ finish_reason: "stop" as const,
+ },
+ ],
+ usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
+ };
+
+ const result = unmaskSecretsResponse(response, context);
+ expect(result.id).toBe("test-id");
+ expect(result.model).toBe("gpt-4-turbo");
+ expect(result.usage).toEqual({ prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 });
+ });
+});
+
+describe("edge cases", () => {
+ test("returns original text when no locations", () => {
+ const text = "Hello world";
+ const result = maskSecrets(text, []);
+ expect(result.masked).toBe("Hello world");
+ expect(Object.keys(result.context.mapping)).toHaveLength(0);
+ });
+
+ test("reuses placeholder for duplicate secret values", () => {
+ const text = `Key1: ${sampleSecret} Key2: ${sampleSecret}`;
+ const locations: SecretLocation[] = [
+ { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" },
+ {
+ start: 6 + sampleSecret.length + 7,
+ end: 6 + sampleSecret.length * 2 + 7,
+ type: "API_KEY_OPENAI",
+ },
+ ];
+ const result = maskSecrets(text, locations);
+
+ expect(result.masked).toBe(
+ "Key1: [[SECRET_MASKED_API_KEY_OPENAI_1]] Key2: [[SECRET_MASKED_API_KEY_OPENAI_1]]",
+ );
+ expect(Object.keys(result.context.mapping)).toHaveLength(1);
+ });
+
+ test("preserves context across multiple calls", () => {
+ const context = createSecretsMaskingContext();
+
+ maskSecrets(
+ `Key: ${sampleSecret}`,
+ [{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }],
+ context,
+ );
+
+ const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
+ const result2 = maskSecrets(
+ `Another: ${anotherSecret}`,
+ [{ start: 9, end: 9 + anotherSecret.length, type: "API_KEY_OPENAI" }],
+ context,
+ );
+
+ expect(result2.masked).toBe("Another: [[SECRET_MASKED_API_KEY_OPENAI_2]]");
+ expect(Object.keys(context.mapping)).toHaveLength(2);
+ });
+});
--- /dev/null
+import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
+import { resolveOverlaps } from "../utils/conflict-resolver";
+import {
+ createPlaceholderContext,
+ flushBuffer,
+ incrementAndGenerate,
+ type MaskResult,
+ type PlaceholderContext,
+ processStreamChunk,
+ replaceWithPlaceholders,
+ restorePlaceholders,
+ restoreResponsePlaceholders,
+ transformMessagesPerPart,
+} from "../utils/message-transform";
+import { generateSecretPlaceholder } from "../utils/placeholders";
+import type { MessageSecretsResult, SecretLocation } from "./detect";
+
+export type { MaskResult } from "../utils/message-transform";
+
+/**
+ * Creates a new secrets masking context for a request
+ */
+export function createSecretsMaskingContext(): PlaceholderContext {
+ return createPlaceholderContext();
+}
+
+/**
+ * Generates a placeholder for a secret type
+ *
+ * Format: [[SECRET_MASKED_{TYPE}_{N}]] e.g. [[SECRET_MASKED_API_KEY_OPENAI_1]]
+ */
+function generatePlaceholder(secretType: string, context: PlaceholderContext): string {
+ return incrementAndGenerate(secretType, context, generateSecretPlaceholder);
+}
+
+/**
+ * Masks secrets in text, replacing them with placeholders
+ */
+export function maskSecrets(
+ text: string,
+ locations: SecretLocation[],
+ context?: PlaceholderContext,
+): MaskResult {
+ const ctx = context || createSecretsMaskingContext();
+ const masked = replaceWithPlaceholders(
+ text,
+ locations,
+ ctx,
+ (loc) => loc.type,
+ generatePlaceholder,
+ resolveOverlaps,
+ );
+ return { masked, context: ctx };
+}
+
+/**
+ * Unmasks text by replacing placeholders with original secrets
+ *
+ * @param text - Text containing secret placeholders
+ * @param context - Masking context with mappings
+ */
+export function unmaskSecrets(text: string, context: PlaceholderContext): string {
+ return restorePlaceholders(text, context);
+}
+
+/**
+ * Masks secrets in messages using per-part detection results
+ *
+ * Uses transformMessagesPerPart for the common iteration pattern.
+ */
+export function maskMessages(
+ messages: ChatMessage[],
+ detection: MessageSecretsResult,
+): { masked: ChatMessage[]; context: PlaceholderContext } {
+ const context = createSecretsMaskingContext();
+
+ const masked = transformMessagesPerPart(
+ messages,
+ detection.messageLocations,
+ (text, locations, ctx) => maskSecrets(text, locations, ctx).masked,
+ context,
+ );
+
+ return { masked, context };
+}
+
+/**
+ * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found
+ *
+ * Returns the unmasked portion and any remaining buffer that might contain partial placeholders.
+ */
+export function unmaskSecretsStreamChunk(
+ buffer: string,
+ newChunk: string,
+ context: PlaceholderContext,
+): { output: string; remainingBuffer: string } {
+ return processStreamChunk(buffer, newChunk, context, unmaskSecrets);
+}
+
+/**
+ * Flushes remaining buffer at end of stream
+ */
+export function flushSecretsMaskingBuffer(buffer: string, context: PlaceholderContext): string {
+ return flushBuffer(buffer, context, unmaskSecrets);
+}
+
+/**
+ * Unmasks a chat completion response by replacing placeholders in all choices
+ */
+export function unmaskSecretsResponse(
+ response: ChatCompletionResponse,
+ context: PlaceholderContext,
+): ChatCompletionResponse {
+ return restoreResponsePlaceholders(response, context);
+}
import { describe, expect, test } from "bun:test";
+import type { PIIDetectionResult, PIIEntity } from "../pii/detect";
+import { maskMessages } from "../pii/mask";
import type { ChatMessage } from "../services/llm-client";
-import { maskMessages } from "../services/masking";
-import type { PIIEntity } from "../services/pii-detector";
import type { ContentPart } from "../utils/content";
+/**
+ * Helper to create PIIDetectionResult from per-part entities
+ */
+function createPIIResult(messageEntities: PIIEntity[][][]): PIIDetectionResult {
+ return {
+ hasPII: messageEntities.flat(2).length > 0,
+ messageEntities,
+ allEntities: messageEntities.flat(2),
+ scanTimeMs: 0,
+ language: "en",
+ languageFallback: false,
+ };
+}
+
describe("Multimodal content handling", () => {
- describe("PII masking with offset tracking", () => {
+ describe("PII masking with per-part entities", () => {
test("masks PII in multimodal array content", () => {
const messages: ChatMessage[] = [
{
},
];
- // Concatenated text: "My email is john@example.com and\nmy phone is 555-1234"
- // Entities for this concatenated text:
- const entities: PIIEntity[] = [
- { entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 0.9 }, // john@example.com in part 0
- { entity_type: "PHONE_NUMBER", start: 45, end: 53, score: 0.85 }, // 555-1234 in part 2 (after newline)
- ];
-
- const entitiesByMessage = [entities];
+ // Per-part entities: messageEntities[msgIdx][partIdx] = entities
+ const detection = createPIIResult([
+ [
+ // Part 0: email entity (positions relative to part text)
+ [{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 0.9 }],
+ // Part 1: image, no entities
+ [],
+ // Part 2: phone entity (positions relative to part text)
+ [{ entity_type: "PHONE_NUMBER", start: 12, end: 20, score: 0.85 }],
+ ],
+ ]);
- const { masked } = maskMessages(messages, entitiesByMessage);
+ const { masked } = maskMessages(messages, detection);
// Verify the content is still an array
expect(Array.isArray(masked[0].content)).toBe(true);
});
test("returns masked array instead of original unmasked array", () => {
- // This tests the bug fix: previously array content was extracted and masked,
- // but then the original array was returned unchanged
const messages: ChatMessage[] = [
{
role: "user",
},
];
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 8, end: 13, score: 0.9 }, // Alice
- { entity_type: "EMAIL_ADDRESS", start: 17, end: 33, score: 0.95 }, // alice@secret.com
- ];
+ const detection = createPIIResult([
+ [
+ // Part 0 entities
+ [
+ { entity_type: "PERSON", start: 8, end: 13, score: 0.9 },
+ { entity_type: "EMAIL_ADDRESS", start: 17, end: 33, score: 0.95 },
+ ],
+ ],
+ ]);
- const { masked } = maskMessages(messages, [entities]);
+ const { masked } = maskMessages(messages, detection);
// Verify content is still array
expect(Array.isArray(masked[0].content)).toBe(true);
expect(maskedContent[0].text).toContain("[[EMAIL_ADDRESS_1]]");
});
- test("handles entities spanning multiple parts with proper offsets", () => {
+ test("handles multiple text parts independently", () => {
const messages: ChatMessage[] = [
{
role: "user",
content: [
- { type: "text", text: "First part with email@" },
- { type: "text", text: "example.com in two parts" },
+ { type: "text", text: "First: john@example.com" },
+ { type: "text", text: "Second: jane@example.com" },
],
},
];
- // In concatenated text: "First part with email@\nexample.com in two parts"
- // Email spans from position 16 to 39 (crossing the newline at position 22)
- const entities: PIIEntity[] = [
- { entity_type: "EMAIL_ADDRESS", start: 16, end: 34, score: 0.9 },
- ];
+ const detection = createPIIResult([
+ [
+ // Part 0 entity
+ [{ entity_type: "EMAIL_ADDRESS", start: 7, end: 23, score: 0.9 }],
+ // Part 1 entity
+ [{ entity_type: "EMAIL_ADDRESS", start: 8, end: 24, score: 0.9 }],
+ ],
+ ]);
- const { masked } = maskMessages(messages, [entities]);
+ const { masked } = maskMessages(messages, detection);
const maskedContent = masked[0].content as ContentPart[];
- // Both parts should be affected by the email entity
- // Part 0: "First part with [[EMAIL" or similar
- // Part 1: "ADDRESS_1]] in two parts" or similar
- // The exact split depends on how the masking handles cross-boundary entities
+ expect(maskedContent[0].text).toBe("First: [[EMAIL_ADDRESS_1]]");
+ expect(maskedContent[1].text).toBe("Second: [[EMAIL_ADDRESS_2]]");
+ });
+
+ test("handles mixed string and array content messages", () => {
+ const messages: ChatMessage[] = [
+ { role: "system", content: "You are helpful" },
+ {
+ role: "user",
+ content: [{ type: "text", text: "My name is John" }],
+ },
+ { role: "assistant", content: "Hello John!" },
+ ];
+
+ const detection = createPIIResult([
+ // Message 0 (system): no PII
+ [[]],
+ // Message 1 (user multimodal): PII in part 0
+ [[{ entity_type: "PERSON", start: 11, end: 15, score: 0.9 }]],
+ // Message 2 (assistant): PII in part 0
+ [[{ entity_type: "PERSON", start: 6, end: 10, score: 0.9 }]],
+ ]);
- // At minimum, verify that the entity is masked somewhere
- const fullMasked = maskedContent
- .filter((p) => p.type === "text")
- .map((p) => p.text)
- .join("\n");
+ const { masked } = maskMessages(messages, detection);
- expect(fullMasked).toContain("[[EMAIL_ADDRESS_");
- expect(fullMasked).not.toContain("email@example.com");
+ expect(masked[0].content).toBe("You are helpful");
+ expect((masked[1].content as ContentPart[])[0].text).toBe("My name is [[PERSON_1]]");
+ expect(masked[2].content).toBe("Hello [[PERSON_1]]!");
});
});
});
-import type { PatternDetector, SecretsMatch, SecretsRedaction } from "./types";
+import type { PatternDetector, SecretLocation, SecretsMatch } from "./types";
import { detectPattern } from "./utils";
/**
detect(text: string, enabledTypes: Set<string>) {
const matches: SecretsMatch[] = [];
- const redactions: SecretsRedaction[] = [];
+ const locations: SecretLocation[] = [];
// OpenAI API keys: sk-... followed by alphanumeric chars
// Modern format: sk-proj-... or sk-... with 48+ total chars
if (enabledTypes.has("API_KEY_OPENAI")) {
const openaiPattern = /sk-[a-zA-Z0-9_-]{45,}/g;
- detectPattern(text, openaiPattern, "API_KEY_OPENAI", matches, redactions);
+ detectPattern(text, openaiPattern, "API_KEY_OPENAI", matches, locations);
}
// AWS access keys: AKIA followed by 16 uppercase alphanumeric chars
if (enabledTypes.has("API_KEY_AWS")) {
const awsPattern = /AKIA[0-9A-Z]{16}/g;
- detectPattern(text, awsPattern, "API_KEY_AWS", matches, redactions);
+ detectPattern(text, awsPattern, "API_KEY_AWS", matches, locations);
}
// GitHub tokens: ghp_, gho_, ghu_, ghs_, ghr_ followed by 36+ alphanumeric chars
if (enabledTypes.has("API_KEY_GITHUB")) {
const githubPattern = /gh[pousr]_[a-zA-Z0-9]{36,}/g;
- detectPattern(text, githubPattern, "API_KEY_GITHUB", matches, redactions);
+ detectPattern(text, githubPattern, "API_KEY_GITHUB", matches, locations);
}
return {
detected: matches.length > 0,
matches,
- redactions: redactions.length > 0 ? redactions : undefined,
+ locations: locations.length > 0 ? locations : undefined,
};
},
};
-import type { PatternDetector, SecretsMatch, SecretsRedaction } from "./types";
+import type { PatternDetector, SecretLocation, SecretsMatch } from "./types";
import { detectPattern } from "./utils";
/**
detect(text: string, enabledTypes: Set<string>) {
const matches: SecretsMatch[] = [];
- const redactions: SecretsRedaction[] = [];
+ const locations: SecretLocation[] = [];
// Environment variable password patterns: _PASSWORD or _PWD suffix with value (8+ chars)
// Case-insensitive for variable name, supports = and : assignment, quoted/unquoted values
if (enabledTypes.has("ENV_PASSWORD")) {
const passwordPattern =
/[A-Za-z_][A-Za-z0-9_]*(?:PASSWORD|_PWD)\s*[=:]\s*['"]?[^\s'"]{8,}['"]?/gi;
- detectPattern(text, passwordPattern, "ENV_PASSWORD", matches, redactions);
+ detectPattern(text, passwordPattern, "ENV_PASSWORD", matches, locations);
}
// Environment variable secret patterns: _SECRET suffix with value (8+ chars)
// Case-insensitive for variable name, supports = and : assignment, quoted/unquoted values
if (enabledTypes.has("ENV_SECRET")) {
const secretPattern = /[A-Za-z_][A-Za-z0-9_]*_SECRET\s*[=:]\s*['"]?[^\s'"]{8,}['"]?/gi;
- detectPattern(text, secretPattern, "ENV_SECRET", matches, redactions);
+ detectPattern(text, secretPattern, "ENV_SECRET", matches, locations);
}
// Database connection strings with embedded passwords (user:password@host format)
if (enabledTypes.has("CONNECTION_STRING")) {
const connectionPattern =
/(?:postgres(?:ql)?|mysql|mariadb|mongodb(?:\+srv)?|redis|amqps?):\/\/[^:]+:[^@\s]+@[^\s'"]+/gi;
- detectPattern(text, connectionPattern, "CONNECTION_STRING", matches, redactions);
+ detectPattern(text, connectionPattern, "CONNECTION_STRING", matches, locations);
}
return {
detected: matches.length > 0,
matches,
- redactions: redactions.length > 0 ? redactions : undefined,
+ locations: locations.length > 0 ? locations : undefined,
};
},
};
import type {
PatternDetector,
+ SecretLocation,
SecretsDetectionResult,
SecretsMatch,
- SecretsRedaction,
} from "./types";
import { detectPattern } from "./utils";
detect(text: string, enabledTypes: Set<string>): SecretsDetectionResult {
const matches: SecretsMatch[] = [];
- const redactions: SecretsRedaction[] = [];
+ const locations: SecretLocation[] = [];
// OpenSSH private key pattern
if (enabledTypes.has("OPENSSH_PRIVATE_KEY")) {
const opensshPattern =
/-----BEGIN OPENSSH PRIVATE KEY-----[\s\S]*?-----END OPENSSH PRIVATE KEY-----/g;
- detectPattern(text, opensshPattern, "OPENSSH_PRIVATE_KEY", matches, redactions);
+ detectPattern(text, opensshPattern, "OPENSSH_PRIVATE_KEY", matches, locations);
}
// PEM private key patterns
// RSA PRIVATE KEY
const rsaPattern = /-----BEGIN RSA PRIVATE KEY-----[\s\S]*?-----END RSA PRIVATE KEY-----/g;
- detectPattern(text, rsaPattern, "PEM_PRIVATE_KEY", matches, redactions, matchedPositions);
+ detectPattern(text, rsaPattern, "PEM_PRIVATE_KEY", matches, locations, matchedPositions);
// Remove PEM_PRIVATE_KEY from matches to accumulate all PEM types together
const pemMatch = matches.find((m) => m.type === "PEM_PRIVATE_KEY");
privateKeyPattern,
"PEM_PRIVATE_KEY",
tempMatches,
- redactions,
+ locations,
matchedPositions,
);
totalPemCount += tempMatches[0]?.count || 0;
encryptedPattern,
"PEM_PRIVATE_KEY",
tempMatches2,
- redactions,
+ locations,
matchedPositions,
);
totalPemCount += tempMatches2[0]?.count || 0;
return {
detected: matches.length > 0,
matches,
- redactions: redactions.length > 0 ? redactions : undefined,
+ locations: locations.length > 0 ? locations : undefined,
};
},
};
-import type { PatternDetector, SecretsMatch, SecretsRedaction } from "./types";
+import type { PatternDetector, SecretLocation, SecretsMatch } from "./types";
import { detectPattern } from "./utils";
/**
detect(text: string, enabledTypes: Set<string>) {
const matches: SecretsMatch[] = [];
- const redactions: SecretsRedaction[] = [];
+ const locations: SecretLocation[] = [];
// JWT tokens: three base64url segments separated by dots
// Header starts with eyJ (base64 for {"...), minimum 20 chars per segment
if (enabledTypes.has("JWT_TOKEN")) {
const jwtPattern = /eyJ[a-zA-Z0-9_-]{20,}\.eyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}/g;
- detectPattern(text, jwtPattern, "JWT_TOKEN", matches, redactions);
+ detectPattern(text, jwtPattern, "JWT_TOKEN", matches, locations);
}
// Bearer tokens in Authorization-style contexts
// Matches "Bearer " followed by a token (at least 40 chars to reduce placeholder matches)
if (enabledTypes.has("BEARER_TOKEN")) {
const bearerPattern = /Bearer\s+[a-zA-Z0-9._-]{40,}/gi;
- detectPattern(text, bearerPattern, "BEARER_TOKEN", matches, redactions);
+ detectPattern(text, bearerPattern, "BEARER_TOKEN", matches, locations);
}
return {
detected: matches.length > 0,
matches,
- redactions: redactions.length > 0 ? redactions : undefined,
+ locations: locations.length > 0 ? locations : undefined,
};
},
};
count: number;
}
-export interface SecretsRedaction {
+/**
+ * Location of a detected secret in text
+ */
+export interface SecretLocation {
start: number;
end: number;
type: SecretEntityType;
export interface SecretsDetectionResult {
detected: boolean;
matches: SecretsMatch[];
- redactions?: SecretsRedaction[];
+ locations?: SecretLocation[];
+}
+
+/**
+ * Per-message, per-part secrets detection result
+ * Structure: messageLocations[msgIdx][partIdx] = locations for that part
+ */
+export interface MessageSecretsResult {
+ detected: boolean;
+ matches: SecretsMatch[];
+ /** Per-message, per-part secret locations */
+ messageLocations: SecretLocation[][][];
}
/**
-import type { SecretsMatch, SecretsRedaction } from "./types";
+import type { SecretLocation, SecretsMatch } from "./types";
/**
- * Helper to detect secrets matching a pattern and collect matches/redactions
+ * Helper to detect secrets matching a pattern and collect matches/locations
*/
export function detectPattern(
text: string,
pattern: RegExp,
entityType: string,
matches: SecretsMatch[],
- redactions: SecretsRedaction[],
+ locations: SecretLocation[],
existingPositions?: Set<number>,
): number {
let count = 0;
count++;
existingPositions?.add(match.index);
- redactions.push({
+ locations.push({
start: match.index,
end: match.index + match[0].length,
- type: entityType as SecretsRedaction["type"],
+ type: entityType as SecretLocation["type"],
});
}
}
+++ /dev/null
-import { describe, expect, test } from "bun:test";
-import type { SecretsRedaction } from "./detect";
-import {
- createRedactionContext,
- flushRedactionBuffer,
- redactMessagesSecrets,
- redactSecrets,
- unredactResponse,
- unredactSecrets,
- unredactStreamChunk,
-} from "./redact";
-
-const sampleSecret = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx";
-
-describe("redactSecrets", () => {
- test("returns original text when no redactions", () => {
- const text = "Hello world";
- const result = redactSecrets(text, []);
- expect(result.redacted).toBe("Hello world");
- expect(Object.keys(result.context.mapping)).toHaveLength(0);
- });
-
- test("redacts single secret", () => {
- const text = `My API key is ${sampleSecret}`;
- const redactions: SecretsRedaction[] = [
- { start: 14, end: 14 + sampleSecret.length, type: "API_KEY_OPENAI" },
- ];
- const result = redactSecrets(text, redactions);
-
- expect(result.redacted).toBe("My API key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
- expect(result.context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"]).toBe(sampleSecret);
- });
-
- test("redacts multiple secrets of same type", () => {
- const text = `Key1: ${sampleSecret} Key2: ${sampleSecret}`;
- const redactions: SecretsRedaction[] = [
- { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" },
- {
- start: 6 + sampleSecret.length + 7,
- end: 6 + sampleSecret.length * 2 + 7,
- type: "API_KEY_OPENAI",
- },
- ];
- const result = redactSecrets(text, redactions);
-
- // Same secret value should get same placeholder
- expect(result.redacted).toBe(
- "Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]] Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
- );
- expect(Object.keys(result.context.mapping)).toHaveLength(1);
- });
-
- test("redacts multiple secrets of different types", () => {
- const awsKey = "AKIAIOSFODNN7EXAMPLE";
- const text = `OpenAI: ${sampleSecret} AWS: ${awsKey}`;
- const redactions: SecretsRedaction[] = [
- { start: 8, end: 8 + sampleSecret.length, type: "API_KEY_OPENAI" },
- {
- start: 8 + sampleSecret.length + 6,
- end: 8 + sampleSecret.length + 6 + awsKey.length,
- type: "API_KEY_AWS",
- },
- ];
- const result = redactSecrets(text, redactions);
-
- expect(result.redacted).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
- expect(result.redacted).toContain("[[SECRET_REDACTED_API_KEY_AWS_1]]");
- expect(Object.keys(result.context.mapping)).toHaveLength(2);
- });
-
- test("preserves context across multiple calls", () => {
- const context = createRedactionContext();
- const text1 = `Key: ${sampleSecret}`;
- const redactions1: SecretsRedaction[] = [
- { start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" },
- ];
- redactSecrets(text1, redactions1, context);
-
- const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
- const text2 = `Another: ${anotherSecret}`;
- const redactions2: SecretsRedaction[] = [
- { start: 9, end: 9 + anotherSecret.length, type: "API_KEY_OPENAI" },
- ];
- const result2 = redactSecrets(text2, redactions2, context);
-
- // Second secret should get incremented counter
- expect(result2.redacted).toBe("Another: [[SECRET_REDACTED_API_KEY_OPENAI_2]]");
- expect(Object.keys(context.mapping)).toHaveLength(2);
- });
-});
-
-describe("unredactSecrets", () => {
- test("returns original text when no mappings", () => {
- const context = createRedactionContext();
- const text = "Hello world";
- const result = unredactSecrets(text, context);
- expect(result).toBe("Hello world");
- });
-
- test("restores single secret", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const text = "My API key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]";
- const result = unredactSecrets(text, context);
-
- expect(result).toBe(`My API key is ${sampleSecret}`);
- });
-
- test("restores multiple secrets", () => {
- const context = createRedactionContext();
- const awsKey = "AKIAIOSFODNN7EXAMPLE";
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
- context.mapping["[[SECRET_REDACTED_API_KEY_AWS_1]]"] = awsKey;
-
- const text =
- "OpenAI: [[SECRET_REDACTED_API_KEY_OPENAI_1]] AWS: [[SECRET_REDACTED_API_KEY_AWS_1]]";
- const result = unredactSecrets(text, context);
-
- expect(result).toBe(`OpenAI: ${sampleSecret} AWS: ${awsKey}`);
- });
-
- test("restores repeated placeholders", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const text =
- "Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]] Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]";
- const result = unredactSecrets(text, context);
-
- expect(result).toBe(`Key1: ${sampleSecret} Key2: ${sampleSecret}`);
- });
-});
-
-describe("redact -> unredact roundtrip", () => {
- test("preserves original data through roundtrip", () => {
- const originalText = `
-Here are my credentials:
-OpenAI API Key: ${sampleSecret}
-Please store them securely.
-`;
- const redactions: SecretsRedaction[] = [
- {
- start: originalText.indexOf(sampleSecret),
- end: originalText.indexOf(sampleSecret) + sampleSecret.length,
- type: "API_KEY_OPENAI",
- },
- ];
-
- const { redacted, context } = redactSecrets(originalText, redactions);
-
- // Verify secret is not in redacted text
- expect(redacted).not.toContain(sampleSecret);
- expect(redacted).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
-
- // Unredact and verify original is restored
- const restored = unredactSecrets(redacted, context);
- expect(restored).toBe(originalText);
- });
-
- test("handles empty redactions array", () => {
- const text = "No secrets here";
- const { redacted, context } = redactSecrets(text, []);
- const restored = unredactSecrets(redacted, context);
- expect(restored).toBe(text);
- });
-});
-
-describe("redactMessagesSecrets", () => {
- test("redacts secrets in multiple messages", () => {
- const messages = [
- { role: "user" as const, content: `My key is ${sampleSecret}` },
- { role: "assistant" as const, content: "I'll help you with that." },
- ];
- const redactionsByMessage: SecretsRedaction[][] = [
- [{ start: 10, end: 10 + sampleSecret.length, type: "API_KEY_OPENAI" }],
- [],
- ];
-
- const { redacted, context } = redactMessagesSecrets(messages, redactionsByMessage);
-
- expect(redacted[0].content).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
- expect(redacted[0].content).not.toContain(sampleSecret);
- expect(redacted[1].content).toBe("I'll help you with that.");
- expect(Object.keys(context.mapping)).toHaveLength(1);
- });
-
- test("preserves message roles", () => {
- const messages = [
- { role: "system" as const, content: "You are helpful" },
- { role: "user" as const, content: `Key: ${sampleSecret}` },
- ];
- const redactionsByMessage: SecretsRedaction[][] = [
- [],
- [{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }],
- ];
-
- const { redacted } = redactMessagesSecrets(messages, redactionsByMessage);
-
- expect(redacted[0].role).toBe("system");
- expect(redacted[1].role).toBe("user");
- });
-
- test("shares context across messages", () => {
- const messages = [
- { role: "user" as const, content: `Key1: ${sampleSecret}` },
- { role: "user" as const, content: `Key2: ${sampleSecret}` },
- ];
- const redactionsByMessage: SecretsRedaction[][] = [
- [{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }],
- [{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }],
- ];
-
- const { redacted, context } = redactMessagesSecrets(messages, redactionsByMessage);
-
- // Same secret should get same placeholder across messages
- expect(redacted[0].content).toBe("Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
- expect(redacted[1].content).toBe("Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
- expect(Object.keys(context.mapping)).toHaveLength(1);
- });
-});
-
-describe("streaming unredact", () => {
- test("unredacts complete placeholder in chunk", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const { output, remainingBuffer } = unredactStreamChunk(
- "",
- "Key: [[SECRET_REDACTED_API_KEY_OPENAI_1]] end",
- context,
- );
-
- expect(output).toBe(`Key: ${sampleSecret} end`);
- expect(remainingBuffer).toBe("");
- });
-
- test("buffers partial placeholder", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const { output, remainingBuffer } = unredactStreamChunk("", "Key: [[SECRET_RED", context);
-
- expect(output).toBe("Key: ");
- expect(remainingBuffer).toBe("[[SECRET_RED");
- });
-
- test("completes buffered placeholder", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const { output, remainingBuffer } = unredactStreamChunk(
- "[[SECRET_RED",
- "ACTED_API_KEY_OPENAI_1]] done",
- context,
- );
-
- expect(output).toBe(`${sampleSecret} done`);
- expect(remainingBuffer).toBe("");
- });
-
- test("handles text without placeholders", () => {
- const context = createRedactionContext();
-
- const { output, remainingBuffer } = unredactStreamChunk("", "Hello world", context);
-
- expect(output).toBe("Hello world");
- expect(remainingBuffer).toBe("");
- });
-
- test("flushes remaining buffer", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const result = flushRedactionBuffer("<incomplete", context);
- expect(result).toBe("<incomplete");
- });
-
- test("flushes empty buffer", () => {
- const context = createRedactionContext();
- const result = flushRedactionBuffer("", context);
- expect(result).toBe("");
- });
-});
-
-describe("unredactResponse", () => {
- test("unredacts all choices in response", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const response = {
- id: "test",
- object: "chat.completion" as const,
- created: Date.now(),
- model: "gpt-4",
- choices: [
- {
- index: 0,
- message: {
- role: "assistant" as const,
- content: "Your key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
- },
- finish_reason: "stop" as const,
- },
- ],
- };
-
- const result = unredactResponse(response, context);
- expect(result.choices[0].message.content).toBe(`Your key is ${sampleSecret}`);
- });
-
- test("handles multiple choices", () => {
- const context = createRedactionContext();
- context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
-
- const response = {
- id: "test",
- object: "chat.completion" as const,
- created: Date.now(),
- model: "gpt-4",
- choices: [
- {
- index: 0,
- message: {
- role: "assistant" as const,
- content: "Choice 1: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
- },
- finish_reason: "stop" as const,
- },
- {
- index: 1,
- message: {
- role: "assistant" as const,
- content: "Choice 2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
- },
- finish_reason: "stop" as const,
- },
- ],
- };
-
- const result = unredactResponse(response, context);
- expect(result.choices[0].message.content).toBe(`Choice 1: ${sampleSecret}`);
- expect(result.choices[1].message.content).toBe(`Choice 2: ${sampleSecret}`);
- });
-
- test("preserves response structure", () => {
- const context = createRedactionContext();
- const response = {
- id: "test-id",
- object: "chat.completion" as const,
- created: 12345,
- model: "gpt-4-turbo",
- choices: [
- {
- index: 0,
- message: { role: "assistant" as const, content: "Hello" },
- finish_reason: "stop" as const,
- },
- ],
- usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
- };
-
- const result = unredactResponse(response, context);
- expect(result.id).toBe("test-id");
- expect(result.model).toBe("gpt-4-turbo");
- expect(result.usage).toEqual({ prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 });
- });
-});
+++ /dev/null
-import { findPartialPlaceholderStart, generateSecretPlaceholder } from "../constants/placeholders";
-import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
-import { resolveOverlaps } from "../utils/conflict-resolver";
-import { extractTextContent } from "../utils/content";
-import type { SecretsRedaction } from "./detect";
-
-/**
- * Context for tracking secret redaction mappings
- * Similar to MaskingContext for PII but for secrets
- */
-export interface RedactionContext {
- /** Maps placeholder -> original secret */
- mapping: Record<string, string>;
- /** Maps original secret -> placeholder */
- reverseMapping: Record<string, string>;
- /** Counter per secret type for sequential numbering */
- counters: Record<string, number>;
-}
-
-export interface RedactionResult {
- redacted: string;
- context: RedactionContext;
-}
-
-/**
- * Creates a new redaction context for a request
- */
-export function createRedactionContext(): RedactionContext {
- return {
- mapping: {},
- reverseMapping: {},
- counters: {},
- };
-}
-
-/**
- * Generates a placeholder for a secret type
- *
- * Format: [[SECRET_REDACTED_{TYPE}_{N}]] e.g. [[SECRET_REDACTED_API_KEY_OPENAI_1]]
- */
-function generatePlaceholder(secretType: string, context: RedactionContext): string {
- const count = (context.counters[secretType] || 0) + 1;
- context.counters[secretType] = count;
-
- return generateSecretPlaceholder(secretType, count);
-}
-
-/**
- * Redacts secrets in text, replacing them with placeholders
- *
- * Stores mapping in context for later unredaction.
- * Redactions must be provided sorted by start position descending (as returned by detectSecrets).
- *
- * @param text - The text to redact secrets from
- * @param redactions - Array of redaction positions (sorted by start position descending)
- * @param context - Optional existing context to reuse (for multiple messages)
- */
-export function redactSecrets(
- text: string,
- redactions: SecretsRedaction[],
- context?: RedactionContext,
-): RedactionResult {
- const ctx = context || createRedactionContext();
-
- if (redactions.length === 0) {
- return { redacted: text, context: ctx };
- }
-
- // Resolve conflicts between overlapping redactions
- const resolved = resolveOverlaps(redactions);
-
- // First pass: sort by start position ascending to assign placeholders in order of appearance
- const sortedByStart = [...resolved].sort((a, b) => a.start - b.start);
-
- // Assign placeholders in order of appearance
- const redactionPlaceholders = new Map<SecretsRedaction, string>();
- for (const redaction of sortedByStart) {
- const originalValue = text.slice(redaction.start, redaction.end);
-
- // Check if we already have a placeholder for this exact value
- let placeholder = ctx.reverseMapping[originalValue];
-
- if (!placeholder) {
- placeholder = generatePlaceholder(redaction.type, ctx);
- ctx.mapping[placeholder] = originalValue;
- ctx.reverseMapping[originalValue] = placeholder;
- }
-
- redactionPlaceholders.set(redaction, placeholder);
- }
-
- // Second pass: replace from end to start to maintain correct string positions
- const sortedByEnd = [...resolved].sort((a, b) => b.start - a.start);
-
- let result = text;
- for (const redaction of sortedByEnd) {
- const placeholder = redactionPlaceholders.get(redaction)!;
- result = result.slice(0, redaction.start) + placeholder + result.slice(redaction.end);
- }
-
- return { redacted: result, context: ctx };
-}
-
-/**
- * Unredacts text by replacing placeholders with original secrets
- *
- * @param text - Text containing secret placeholders
- * @param context - Redaction context with mappings
- */
-export function unredactSecrets(text: string, context: RedactionContext): string {
- let result = text;
-
- // Sort placeholders by length descending to avoid partial replacements
- const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length);
-
- for (const placeholder of placeholders) {
- const originalValue = context.mapping[placeholder];
- // Replace all occurrences of the placeholder
- result = result.split(placeholder).join(originalValue);
- }
-
- return result;
-}
-
-/**
- * Redacts secrets in multiple messages (for chat completions)
- *
- * @param messages - Chat messages to redact
- * @param redactionsByMessage - Redactions for each message (indexed by message position)
- */
-export function redactMessagesSecrets(
- messages: ChatMessage[],
- redactionsByMessage: SecretsRedaction[][],
-): { redacted: ChatMessage[]; context: RedactionContext } {
- const context = createRedactionContext();
-
- const redacted = messages.map((msg, i) => {
- const redactions = redactionsByMessage[i] || [];
- const text = extractTextContent(msg.content);
- const { redacted: redactedContent } = redactSecrets(text, redactions, context);
-
- // If original content was a string, return redacted string
- // Otherwise return original content (arrays are handled in proxy.ts)
- return { ...msg, content: typeof msg.content === "string" ? redactedContent : msg.content };
- });
-
- return { redacted, context };
-}
-
-/**
- * Streaming unredact helper - processes chunks and unredacts when complete placeholders are found
- *
- * Similar to PII unmasking but for secrets.
- * Returns the unredacted portion and any remaining buffer that might contain partial placeholders.
- */
-export function unredactStreamChunk(
- buffer: string,
- newChunk: string,
- context: RedactionContext,
-): { output: string; remainingBuffer: string } {
- const combined = buffer + newChunk;
-
- const partialStart = findPartialPlaceholderStart(combined);
-
- if (partialStart === -1) {
- // No partial placeholder, safe to unredact everything
- return {
- output: unredactSecrets(combined, context),
- remainingBuffer: "",
- };
- }
-
- // Partial placeholder detected, buffer it
- const safeToProcess = combined.slice(0, partialStart);
- const toBuffer = combined.slice(partialStart);
-
- return {
- output: unredactSecrets(safeToProcess, context),
- remainingBuffer: toBuffer,
- };
-}
-
-/**
- * Flushes remaining buffer at end of stream
- */
-export function flushRedactionBuffer(buffer: string, context: RedactionContext): string {
- if (!buffer) return "";
- return unredactSecrets(buffer, context);
-}
-
-/**
- * Unredacts a chat completion response by replacing placeholders in all choices
- */
-export function unredactResponse(
- response: ChatCompletionResponse,
- context: RedactionContext,
-): ChatCompletionResponse {
- return {
- ...response,
- choices: response.choices.map((choice) => ({
- ...choice,
- message: {
- ...choice.message,
- content:
- typeof choice.message.content === "string"
- ? unredactSecrets(choice.message.content, context)
- : choice.message.content,
- },
- })),
- };
-}
import { describe, expect, test } from "bun:test";
-import type { SecretsDetectionResult, SecretsMatch } from "../secrets/detect";
-import type { PIIDetectionResult } from "./pii-detector";
+import type { PIIDetectionResult } from "../pii/detect";
+import type { MessageSecretsResult, SecretsMatch } from "../secrets/detect";
/**
* Pure routing logic extracted for testing
*/
function decideRoute(
piiResult: PIIDetectionResult,
- secretsResult?: SecretsDetectionResult,
- secretsAction?: "block" | "redact" | "route_local",
+ secretsResult?: MessageSecretsResult,
+ secretsAction?: "block" | "mask" | "route_local",
): { provider: "openai" | "local"; reason: string } {
// Check for secrets route_local action first (takes precedence)
if (secretsResult?.detected && secretsAction === "route_local") {
}
if (piiResult.hasPII) {
- const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))];
+ const entityTypes = [...new Set(piiResult.allEntities.map((e) => e.entity_type))];
return {
provider: "local",
reason: `PII detected: ${entityTypes.join(", ")}`,
hasPII: boolean,
entities: Array<{ entity_type: string }> = [],
): PIIDetectionResult {
- const newEntities = entities.map((e) => ({
+ const allEntities = entities.map((e) => ({
entity_type: e.entity_type,
start: 0,
end: 10,
return {
hasPII,
- newEntities,
- entitiesByMessage: [newEntities],
+ allEntities,
+ messageEntities: [[allEntities]],
language: "en",
languageFallback: false,
scanTimeMs: 50,
});
/**
- * Helper to create a mock SecretsDetectionResult
+ * Helper to create a mock MessageSecretsResult
*/
function createSecretsResult(
detected: boolean,
matches: SecretsMatch[] = [],
-): SecretsDetectionResult {
+): MessageSecretsResult {
return {
detected,
matches,
- redactions: matches.map((m, i) => ({ start: i * 100, end: i * 100 + 50, type: m.type })),
+ messageLocations: [],
};
}
});
});
- describe("with redact action", () => {
- test("ignores secrets detection for routing (redacted before PII check)", () => {
+ describe("with mask action", () => {
+ test("ignores secrets detection for routing (masked before PII check)", () => {
const piiResult = createPIIResult(false);
const secretsResult = createSecretsResult(true, [{ type: "BEARER_TOKEN", count: 1 }]);
- const result = decideRoute(piiResult, secretsResult, "redact");
+ const result = decideRoute(piiResult, secretsResult, "mask");
- // With redact action, we route based on PII, not secrets
+ // With mask action, we route based on PII, not secrets
expect(result.provider).toBe("openai");
expect(result.reason).toBe("No PII detected");
});
import { type Config, getConfig } from "../config";
-import type { SecretsDetectionResult } from "../secrets/detect";
-import { type ChatMessage, LLMClient } from "../services/llm-client";
-import { createMaskingContext, type MaskingContext, maskMessages } from "../services/masking";
-import { getPIIDetector, type PIIDetectionResult } from "../services/pii-detector";
+import { getPIIDetector, type PIIDetectionResult } from "../pii/detect";
+import { createMaskingContext, maskMessages } from "../pii/mask";
+import type { MessageSecretsResult } from "../secrets/detect";
+import type { PlaceholderContext } from "../utils/message-transform";
+import { type ChatMessage, LLMClient } from "./llm-client";
/**
* Routing decision result for route mode
reason: string;
piiResult: PIIDetectionResult;
maskedMessages: ChatMessage[];
- maskingContext: MaskingContext;
+ maskingContext: PlaceholderContext;
}
export type RoutingDecision = RouteDecision | MaskDecision;
*/
async decide(
messages: ChatMessage[],
- secretsResult?: SecretsDetectionResult,
+ secretsResult?: MessageSecretsResult,
): Promise<RoutingDecision> {
const detector = getPIIDetector();
const piiResult = await detector.analyzeMessages(messages);
if (this.config.mode === "mask") {
- return await this.decideMask(messages, piiResult);
+ return this.decideMask(messages, piiResult);
}
return this.decideRoute(piiResult, secretsResult);
*/
private decideRoute(
piiResult: PIIDetectionResult,
- secretsResult?: SecretsDetectionResult,
+ secretsResult?: MessageSecretsResult,
): RouteDecision {
// Check for secrets route_local action first (takes precedence)
if (secretsResult?.detected && this.config.secrets_detection.action === "route_local") {
// Route based on PII detection
if (piiResult.hasPII) {
- const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))];
+ const entityTypes = [...new Set(piiResult.allEntities.map((e) => e.entity_type))];
return {
mode: "route",
provider: "local",
};
}
- private async decideMask(
- messages: ChatMessage[],
- piiResult: PIIDetectionResult,
- ): Promise<MaskDecision> {
+ private decideMask(messages: ChatMessage[], piiResult: PIIDetectionResult): MaskDecision {
if (!piiResult.hasPII) {
return {
mode: "mask",
};
}
- const { masked, context } = maskMessages(messages, piiResult.entitiesByMessage);
+ const { masked, context } = maskMessages(messages, piiResult);
- const entityTypes = [...new Set(piiResult.newEntities.map((e) => e.entity_type))];
+ const entityTypes = [...new Set(piiResult.allEntities.map((e) => e.entity_type))];
return {
mode: "mask",
import eld from "eld/small";
import { getConfig } from "../config";
+import type { SupportedLanguage } from "../constants/languages";
-// All 24 spaCy languages with trained pipelines
-export type SupportedLanguage =
- | "ca"
- | "zh"
- | "hr"
- | "da"
- | "nl"
- | "en"
- | "fi"
- | "fr"
- | "de"
- | "el"
- | "it"
- | "ja"
- | "ko"
- | "lt"
- | "mk"
- | "nb"
- | "pl"
- | "pt"
- | "ro"
- | "ru"
- | "sl"
- | "es"
- | "sv"
- | "uk";
+export type { SupportedLanguage } from "../constants/languages";
export interface LanguageDetectionResult {
language: SupportedLanguage;
+++ /dev/null
-import { describe, expect, test } from "bun:test";
-import type { MaskingConfig } from "../config";
-import type { ChatMessage } from "./llm-client";
-import {
- createMaskingContext,
- flushStreamBuffer,
- mask,
- maskMessages,
- unmask,
- unmaskResponse,
- unmaskStreamChunk,
-} from "./masking";
-import type { PIIEntity } from "./pii-detector";
-
-const defaultConfig: MaskingConfig = {
- show_markers: false,
- marker_text: "[protected]",
-};
-
-const configWithMarkers: MaskingConfig = {
- show_markers: true,
- marker_text: "[protected]",
-};
-
-describe("mask", () => {
- test("returns original text when no entities", () => {
- const result = mask("Hello world", []);
- expect(result.masked).toBe("Hello world");
- expect(Object.keys(result.context.mapping)).toHaveLength(0);
- });
-
- test("masks single email entity", () => {
- // "Contact: john@example.com please"
- // ^9 ^25
- const entities: PIIEntity[] = [{ entity_type: "EMAIL_ADDRESS", start: 9, end: 25, score: 1.0 }];
-
- const result = mask("Contact: john@example.com please", entities);
-
- expect(result.masked).toBe("Contact: [[EMAIL_ADDRESS_1]] please");
- expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("john@example.com");
- });
-
- test("masks multiple entities of same type", () => {
- const text = "Emails: a@b.com and c@d.com";
- const entities: PIIEntity[] = [
- { entity_type: "EMAIL_ADDRESS", start: 8, end: 15, score: 1.0 },
- { entity_type: "EMAIL_ADDRESS", start: 20, end: 27, score: 1.0 },
- ];
-
- const result = mask(text, entities);
-
- expect(result.masked).toBe("Emails: [[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_2]]");
- expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("a@b.com");
- expect(result.context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("c@d.com");
- });
-
- test("masks multiple entity types", () => {
- const text = "Hans Müller: hans@firma.de";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 0, end: 11, score: 0.9 },
- { entity_type: "EMAIL_ADDRESS", start: 13, end: 26, score: 1.0 },
- ];
-
- const result = mask(text, entities);
-
- expect(result.masked).toBe("[[PERSON_1]]: [[EMAIL_ADDRESS_1]]");
- expect(result.context.mapping["[[PERSON_1]]"]).toBe("Hans Müller");
- expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("hans@firma.de");
- });
-
- test("reuses placeholder for duplicate values", () => {
- const text = "a@b.com and again a@b.com";
- const entities: PIIEntity[] = [
- { entity_type: "EMAIL_ADDRESS", start: 0, end: 7, score: 1.0 },
- { entity_type: "EMAIL_ADDRESS", start: 18, end: 25, score: 1.0 },
- ];
-
- const result = mask(text, entities);
-
- // Same value should get same placeholder
- expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and again [[EMAIL_ADDRESS_1]]");
- expect(Object.keys(result.context.mapping)).toHaveLength(1);
- });
-
- test("handles adjacent entities", () => {
- const text = "HansMüller";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 0, end: 4, score: 0.9 },
- { entity_type: "PERSON", start: 4, end: 10, score: 0.9 },
- ];
-
- const result = mask(text, entities);
-
- expect(result.masked).toBe("[[PERSON_1]][[PERSON_2]]");
- });
-
- test("preserves context across calls", () => {
- const context = createMaskingContext();
-
- const result1 = mask(
- "Email: a@b.com",
- [{ entity_type: "EMAIL_ADDRESS", start: 7, end: 14, score: 1.0 }],
- context,
- );
-
- expect(result1.masked).toBe("Email: [[EMAIL_ADDRESS_1]]");
-
- const result2 = mask(
- "Another: c@d.com",
- [{ entity_type: "EMAIL_ADDRESS", start: 9, end: 16, score: 1.0 }],
- context,
- );
-
- // Should continue numbering
- expect(result2.masked).toBe("Another: [[EMAIL_ADDRESS_2]]");
- expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("a@b.com");
- expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("c@d.com");
- });
-});
-
-describe("unmask", () => {
- test("returns original text when no mappings", () => {
- const context = createMaskingContext();
- const result = unmask("Hello world", context, defaultConfig);
- expect(result).toBe("Hello world");
- });
-
- test("restores single placeholder", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
-
- const result = unmask("Reply to [[EMAIL_ADDRESS_1]]", context, defaultConfig);
- expect(result).toBe("Reply to john@example.com");
- });
-
- test("restores multiple placeholders", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "Hans Müller";
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "hans@firma.de";
-
- const result = unmask(
- "Hello [[PERSON_1]], your email [[EMAIL_ADDRESS_1]] is confirmed",
- context,
- defaultConfig,
- );
- expect(result).toBe("Hello Hans Müller, your email hans@firma.de is confirmed");
- });
-
- test("restores repeated placeholders", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- const result = unmask("[[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_1]]", context, defaultConfig);
- expect(result).toBe("test@test.com and test@test.com");
- });
-
- test("adds markers when configured", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
-
- const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, configWithMarkers);
- expect(result).toBe("Email: [protected]john@example.com");
- });
-
- test("handles partial placeholder (no match)", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- const result = unmask("Text with [[EMAIL_ADDRESS_2]]", context, defaultConfig);
- expect(result).toBe("Text with [[EMAIL_ADDRESS_2]]"); // No match, unchanged
- });
-});
-
-describe("mask -> unmask roundtrip", () => {
- test("preserves original data through roundtrip", () => {
- const originalText = "Contact Hans Müller at hans@firma.de or call +49123456789";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 8, end: 19, score: 0.9 },
- { entity_type: "EMAIL_ADDRESS", start: 23, end: 36, score: 1.0 },
- { entity_type: "PHONE_NUMBER", start: 45, end: 57, score: 0.95 },
- ];
-
- const { masked, context } = mask(originalText, entities);
-
- // Verify masking worked
- expect(masked).not.toContain("Hans Müller");
- expect(masked).not.toContain("hans@firma.de");
- expect(masked).not.toContain("+49123456789");
-
- // Simulate LLM response that echoes placeholders
- const llmResponse = `I see your contact info: ${masked.match(/\[\[PERSON_1\]\]/)?.[0]}, email ${masked.match(/\[\[EMAIL_ADDRESS_1\]\]/)?.[0]}`;
-
- const unmasked = unmask(llmResponse, context, defaultConfig);
-
- expect(unmasked).toContain("Hans Müller");
- expect(unmasked).toContain("hans@firma.de");
- });
-
- test("handles empty entities array", () => {
- const text = "No PII here";
- const { masked, context } = mask(text, []);
- const unmasked = unmask(masked, context, defaultConfig);
-
- expect(unmasked).toBe(text);
- });
-});
-
-describe("maskMessages", () => {
- test("masks multiple messages", () => {
- const messages: ChatMessage[] = [
- { role: "user", content: "My email is test@example.com" },
- { role: "assistant", content: "Got it" },
- { role: "user", content: "Also john@test.com" },
- ];
-
- const entitiesByMessage: PIIEntity[][] = [
- [{ entity_type: "EMAIL_ADDRESS", start: 12, end: 28, score: 1.0 }],
- [],
- [{ entity_type: "EMAIL_ADDRESS", start: 5, end: 18, score: 1.0 }],
- ];
-
- const { masked, context } = maskMessages(messages, entitiesByMessage);
-
- expect(masked[0].content).toBe("My email is [[EMAIL_ADDRESS_1]]");
- expect(masked[1].content).toBe("Got it");
- expect(masked[2].content).toBe("Also [[EMAIL_ADDRESS_2]]");
-
- expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("test@example.com");
- expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("john@test.com");
- });
-
- test("preserves message roles", () => {
- const messages: ChatMessage[] = [
- { role: "system", content: "You are helpful" },
- { role: "user", content: "Hi" },
- ];
-
- const { masked } = maskMessages(messages, [[], []]);
-
- expect(masked[0].role).toBe("system");
- expect(masked[1].role).toBe("user");
- });
-});
-
-describe("streaming unmask", () => {
- test("unmasks complete placeholder in chunk", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- const { output, remainingBuffer } = unmaskStreamChunk(
- "",
- "Hello [[EMAIL_ADDRESS_1]]!",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("Hello test@test.com!");
- expect(remainingBuffer).toBe("");
- });
-
- test("buffers partial placeholder", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- const { output, remainingBuffer } = unmaskStreamChunk(
- "",
- "Hello [[EMAIL_ADD",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("Hello ");
- expect(remainingBuffer).toBe("[[EMAIL_ADD");
- });
-
- test("completes buffered placeholder", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- const { output, remainingBuffer } = unmaskStreamChunk(
- "[[EMAIL_ADD",
- "RESS_1]] there",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("test@test.com there");
- expect(remainingBuffer).toBe("");
- });
-
- test("handles text without placeholders", () => {
- const context = createMaskingContext();
-
- const { output, remainingBuffer } = unmaskStreamChunk(
- "",
- "Just normal text",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("Just normal text");
- expect(remainingBuffer).toBe("");
- });
-
- test("flushes remaining buffer", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- // Partial that never completes
- const flushed = flushStreamBuffer("[[EMAIL_ADD", context, defaultConfig);
-
- // Should return as-is since no complete placeholder
- expect(flushed).toBe("[[EMAIL_ADD");
- });
-});
-
-describe("unmaskResponse", () => {
- test("unmasks all choices in response", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
- context.mapping["[[PERSON_1]]"] = "John Doe";
-
- const response = {
- id: "chatcmpl-123",
- object: "chat.completion" as const,
- created: 1234567890,
- model: "gpt-4",
- choices: [
- {
- index: 0,
- message: {
- role: "assistant" as const,
- content: "Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]",
- },
- finish_reason: "stop" as const,
- },
- ],
- usage: {
- prompt_tokens: 10,
- completion_tokens: 20,
- total_tokens: 30,
- },
- };
-
- const result = unmaskResponse(response, context, defaultConfig);
-
- expect(result.choices[0].message.content).toBe("Contact John Doe at test@test.com");
- expect(result.id).toBe("chatcmpl-123");
- expect(result.model).toBe("gpt-4");
- });
-
- test("handles multiple choices", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "a@b.com";
-
- const response = {
- id: "chatcmpl-456",
- object: "chat.completion" as const,
- created: 1234567890,
- model: "gpt-4",
- choices: [
- {
- index: 0,
- message: { role: "assistant" as const, content: "First: [[EMAIL_ADDRESS_1]]" },
- finish_reason: "stop" as const,
- },
- {
- index: 1,
- message: { role: "assistant" as const, content: "Second: [[EMAIL_ADDRESS_1]]" },
- finish_reason: "stop" as const,
- },
- ],
- };
-
- const result = unmaskResponse(response, context, defaultConfig);
-
- expect(result.choices[0].message.content).toBe("First: a@b.com");
- expect(result.choices[1].message.content).toBe("Second: a@b.com");
- });
-
- test("preserves response structure", () => {
- const context = createMaskingContext();
- const response = {
- id: "test-id",
- object: "chat.completion" as const,
- created: 999,
- model: "test-model",
- choices: [
- {
- index: 0,
- message: { role: "assistant" as const, content: "No placeholders" },
- finish_reason: null,
- },
- ],
- usage: { prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 },
- };
-
- const result = unmaskResponse(response, context, defaultConfig);
-
- expect(result.id).toBe("test-id");
- expect(result.object).toBe("chat.completion");
- expect(result.created).toBe(999);
- expect(result.model).toBe("test-model");
- expect(result.usage).toEqual({ prompt_tokens: 5, completion_tokens: 10, total_tokens: 15 });
- });
-});
-
-describe("edge cases", () => {
- test("handles unicode in masked text", () => {
- const text = "Kontakt: François Müller";
- const entities: PIIEntity[] = [{ entity_type: "PERSON", start: 9, end: 24, score: 0.9 }];
-
- const { masked, context } = mask(text, entities);
- expect(masked).toBe("Kontakt: [[PERSON_1]]");
-
- const unmasked = unmask(masked, context, defaultConfig);
- expect(unmasked).toBe("Kontakt: François Müller");
- });
-
- test("handles empty text", () => {
- const { masked, context } = mask("", []);
- expect(masked).toBe("");
- expect(unmask("", context, defaultConfig)).toBe("");
- });
-
- test("handles placeholder-like text that is not a real placeholder", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
-
- const result = unmask("Use [[UNKNOWN_1]] format", context, defaultConfig);
- expect(result).toBe("Use [[UNKNOWN_1]] format");
- });
-});
-
-describe("HTML context handling (issue #36)", () => {
- test("unmasks placeholders in HTML without encoding issues", () => {
- // With [[]] format, placeholders are not affected by HTML encoding
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah.chen@hospital.org";
-
- // [[]] brackets don't get HTML-encoded, so they work directly
- const htmlResponse = `<p>Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]</p>`;
-
- const result = unmask(htmlResponse, context, defaultConfig);
-
- expect(result).toBe("<p>Contact Dr. Sarah Chen at sarah.chen@hospital.org</p>");
- });
-
- test("unmasks placeholders in HTML title attributes", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "Jane Smith";
-
- // [[]] works in HTML attributes without encoding
- const htmlWithAttr = `<span title="Contact [[PERSON_1]]">Click here</span>`;
-
- const result = unmask(htmlWithAttr, context, defaultConfig);
-
- expect(result).toBe(`<span title="Contact Jane Smith">Click here</span>`);
- });
-
- test("unmasks placeholders in mailto links", () => {
- const context = createMaskingContext();
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@example.com";
-
- const mailtoHtml = `<a href="mailto:[[EMAIL_ADDRESS_1]]">Send email</a>`;
-
- const result = unmask(mailtoHtml, context, defaultConfig);
-
- expect(result).toBe(`<a href="mailto:test@example.com">Send email</a>`);
- });
-
- test("handles multiple occurrences of same placeholder in HTML", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "Alice";
-
- const response = `<p>[[PERSON_1]] said hello.</p><p>[[PERSON_1]] waved goodbye.</p>`;
-
- const result = unmask(response, context, defaultConfig);
-
- expect(result).toBe("<p>Alice said hello.</p><p>Alice waved goodbye.</p>");
- });
-
- test("works with complex HTML structures", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
- context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah@hospital.org";
- context.mapping["[[PHONE_NUMBER_1]]"] = "+1-555-0123";
-
- const complexHtml = `
- <div class="profile">
- <h1>[[PERSON_1]]</h1>
- <a href="mailto:[[EMAIL_ADDRESS_1]]">[[EMAIL_ADDRESS_1]]</a>
- <span data-phone="[[PHONE_NUMBER_1]]">Call: [[PHONE_NUMBER_1]]</span>
- </div>
- `;
-
- const result = unmask(complexHtml, context, defaultConfig);
-
- expect(result).toContain("Dr. Sarah Chen");
- expect(result).toContain("sarah@hospital.org");
- expect(result).toContain("+1-555-0123");
- expect(result).not.toContain("[[");
- expect(result).not.toContain("]]");
- });
-});
-
-describe("streaming with [[]] placeholders (issue #36)", () => {
- test("handles complete placeholder in chunk", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "John Doe";
-
- const { output, remainingBuffer } = unmaskStreamChunk(
- "",
- "Hello [[PERSON_1]]!",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("Hello John Doe!");
- expect(remainingBuffer).toBe("");
- });
-
- test("buffers partial placeholder at end of chunk", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "John Doe";
-
- // Partial placeholder at end: [[PERS
- const { output, remainingBuffer } = unmaskStreamChunk(
- "",
- "Hello [[PERS",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("Hello ");
- expect(remainingBuffer).toBe("[[PERS");
- });
-
- test("completes buffered placeholder across chunks", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "John Doe";
-
- const { output, remainingBuffer } = unmaskStreamChunk(
- "[[PERS",
- "ON_1]] there",
- context,
- defaultConfig,
- );
-
- expect(output).toBe("John Doe there");
- expect(remainingBuffer).toBe("");
- });
-
- test("handles placeholder split at closing brackets", () => {
- const context = createMaskingContext();
- context.mapping["[[PERSON_1]]"] = "John Doe";
-
- // First chunk ends with incomplete closing
- const result1 = unmaskStreamChunk("", "Hello [[PERSON_1]", context, defaultConfig);
- expect(result1.output).toBe("Hello ");
- expect(result1.remainingBuffer).toBe("[[PERSON_1]");
-
- // Second chunk completes it
- const result2 = unmaskStreamChunk(result1.remainingBuffer, "] world", context, defaultConfig);
- expect(result2.output).toBe("John Doe world");
- expect(result2.remainingBuffer).toBe("");
- });
-});
-
-describe("overlapping entities (issue #33)", () => {
- test("handles overlapping entities with same start - keeps longer", () => {
- // Bug: Presidio returns both "Eric" and "Eric's" as separate PERSON entities
- const text = "Given Eric's feedback";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 6, end: 10, score: 0.85 }, // "Eric"
- { entity_type: "PERSON", start: 6, end: 12, score: 0.8 }, // "Eric's"
- ];
-
- const { masked, context } = mask(text, entities);
-
- // Longer span wins when same start position
- expect(masked).toBe("Given [[PERSON_1]] feedback");
- expect(context.mapping["[[PERSON_1]]"]).toBe("Eric's");
- });
-
- test("handles partially overlapping entities of same type - merges them", () => {
- const text = "Contact John Smith Jones please";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 8, end: 18, score: 0.9 }, // "John Smith"
- { entity_type: "PERSON", start: 13, end: 25, score: 0.7 }, // "Smith Jones"
- ];
-
- const { masked } = mask(text, entities);
-
- // Presidio behavior: same-type overlapping entities are MERGED
- // Merged entity spans 8-25 ("John Smith Jones"), keeps highest score
- expect(masked).toBe("Contact [[PERSON_1]]please");
- });
-
- test("handles nested entities - keeps outer (starts first)", () => {
- const text = "Dr. John Smith is here";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 0, end: 14, score: 0.9 }, // "Dr. John Smith"
- { entity_type: "PERSON", start: 4, end: 8, score: 0.85 }, // "John"
- ];
-
- const { masked } = mask(text, entities);
-
- expect(masked).toBe("[[PERSON_1]] is here");
- });
-
- test("keeps adjacent non-overlapping entities", () => {
- const text = "HansMüller";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, // "Hans"
- { entity_type: "PERSON", start: 4, end: 10, score: 0.9 }, // "Müller"
- ];
-
- const { masked } = mask(text, entities);
-
- expect(masked).toBe("[[PERSON_1]][[PERSON_2]]");
- });
-
- test("handles multiple independent overlap groups", () => {
- const text = "Laura Smith met Eric's friend Bob Jones Jr";
- const entities: PIIEntity[] = [
- // Group 1: same start - longer wins
- { entity_type: "PERSON", start: 0, end: 5, score: 0.85 }, // "Laura"
- { entity_type: "PERSON", start: 0, end: 11, score: 0.9 }, // "Laura Smith"
- // Group 2: same start - longer wins
- { entity_type: "PERSON", start: 16, end: 20, score: 0.85 }, // "Eric"
- { entity_type: "PERSON", start: 16, end: 22, score: 0.8 }, // "Eric's"
- // Group 3: same start - longer wins
- { entity_type: "PERSON", start: 30, end: 33, score: 0.7 }, // "Bob"
- { entity_type: "PERSON", start: 30, end: 42, score: 0.9 }, // "Bob Jones Jr"
- ];
-
- const { masked } = mask(text, entities);
-
- expect(masked).toBe("[[PERSON_1]] met [[PERSON_2]] friend [[PERSON_3]]");
- });
-
- test("entity consistency - same value gets same placeholder", () => {
- const text = "Eric met Eric again";
- const entities: PIIEntity[] = [
- { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, // "Eric"
- { entity_type: "PERSON", start: 9, end: 13, score: 0.9 }, // "Eric"
- ];
-
- const { masked, context } = mask(text, entities);
-
- expect(masked).toBe("[[PERSON_1]] met [[PERSON_1]] again");
- expect(Object.keys(context.mapping)).toHaveLength(1);
- });
-});
+++ /dev/null
-import type { MaskingConfig } from "../config";
-import {
- findPartialPlaceholderStart,
- generatePlaceholder as generatePlaceholderFromFormat,
- PII_PLACEHOLDER_FORMAT,
-} from "../constants/placeholders";
-import { resolveConflicts } from "../utils/conflict-resolver";
-import { extractTextContent } from "../utils/content";
-import type { ChatCompletionResponse, ChatMessage } from "./llm-client";
-import type { PIIEntity } from "./pii-detector";
-
-export interface MaskingContext {
- mapping: Record<string, string>;
- reverseMapping: Record<string, string>;
- counters: Record<string, number>;
-}
-
-export interface MaskResult {
- masked: string;
- context: MaskingContext;
-}
-
-/**
- * Creates a new masking context for a request
- */
-export function createMaskingContext(): MaskingContext {
- return {
- mapping: {},
- reverseMapping: {},
- counters: {},
- };
-}
-
-/**
- * Generates a placeholder for a PII entity type
- */
-function generatePlaceholder(entityType: string, context: MaskingContext): string {
- const count = (context.counters[entityType] || 0) + 1;
- context.counters[entityType] = count;
-
- return generatePlaceholderFromFormat(PII_PLACEHOLDER_FORMAT, entityType, count);
-}
-
-/**
- * Masks PII entities in text, replacing them with placeholders
- *
- * First assigns placeholders in order of appearance (start position ascending),
- * then replaces from end to start to maintain correct string positions
- */
-export function mask(text: string, entities: PIIEntity[], context?: MaskingContext): MaskResult {
- const ctx = context || createMaskingContext();
-
- if (entities.length === 0) {
- return { masked: text, context: ctx };
- }
-
- // Resolve conflicts between overlapping entities using Presidio's algorithm
- // Presidio can return overlapping entities (e.g., "Eric" and "Eric's")
- const resolved = resolveConflicts(entities);
-
- // First pass: sort by start position ascending to assign placeholders in order
- const sortedByStart = [...resolved].sort((a, b) => a.start - b.start);
-
- // Assign placeholders in order of appearance
- const entityPlaceholders = new Map<PIIEntity, string>();
- for (const entity of sortedByStart) {
- const originalValue = text.slice(entity.start, entity.end);
-
- // Check if we already have a placeholder for this exact value
- let placeholder = ctx.reverseMapping[originalValue];
-
- if (!placeholder) {
- placeholder = generatePlaceholder(entity.entity_type, ctx);
- ctx.mapping[placeholder] = originalValue;
- ctx.reverseMapping[originalValue] = placeholder;
- }
-
- entityPlaceholders.set(entity, placeholder);
- }
-
- // Second pass: sort by start position descending for replacement
- // This ensures string indices remain valid as we replace
- const sortedByEnd = [...resolved].sort((a, b) => b.start - a.start);
-
- let result = text;
- for (const entity of sortedByEnd) {
- const placeholder = entityPlaceholders.get(entity)!;
- result = result.slice(0, entity.start) + placeholder + result.slice(entity.end);
- }
-
- return { masked: result, context: ctx };
-}
-
-/**
- * Unmasks text by replacing placeholders with original values
- *
- * Optionally adds markers to indicate protected content
- */
-export function unmask(text: string, context: MaskingContext, config: MaskingConfig): string {
- let result = text;
-
- // Sort placeholders by length descending to avoid partial replacements
- const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length);
-
- for (const placeholder of placeholders) {
- const originalValue = context.mapping[placeholder];
- const replacement = config.show_markers
- ? `${config.marker_text}${originalValue}`
- : originalValue;
-
- // Replace all occurrences of the placeholder
- result = result.split(placeholder).join(replacement);
- }
-
- return result;
-}
-
-/**
- * Masks multiple messages (for chat completions)
- */
-export function maskMessages(
- messages: ChatMessage[],
- entitiesByMessage: PIIEntity[][],
-): { masked: ChatMessage[]; context: MaskingContext } {
- const context = createMaskingContext();
-
- const masked = messages.map((msg, i) => {
- const entities = entitiesByMessage[i] || [];
-
- // Handle array content (multimodal messages)
- if (Array.isArray(msg.content)) {
- if (entities.length === 0) {
- return msg;
- }
-
- // Track offset position within the concatenated text for this message
- // (matches how extractTextContent joins parts with \n)
- let partOffset = 0;
-
- // Mask only text parts with proper offset tracking
- const maskedContent = msg.content.map((part) => {
- if (part.type === "text" && typeof part.text === "string") {
- const partLength = part.text.length;
-
- // Find entities that apply to this specific part
- const partEntities = entities
- .filter((e) => e.start < partOffset + partLength && e.end > partOffset)
- .map((e) => ({
- ...e,
- start: Math.max(0, e.start - partOffset),
- end: Math.min(partLength, e.end - partOffset),
- }));
-
- if (partEntities.length > 0) {
- const { masked: maskedText } = mask(part.text, partEntities, context);
- partOffset += partLength + 1; // +1 for \n separator
- return { ...part, text: maskedText };
- }
-
- partOffset += partLength + 1; // +1 for \n separator
- return part;
- }
- return part;
- });
-
- return { ...msg, content: maskedContent };
- }
-
- // Handle string content (text-only messages)
- const text = extractTextContent(msg.content);
- const { masked: maskedContent } = mask(text, entities, context);
-
- // If original content was a string, return masked string
- // Otherwise return original content
- return { ...msg, content: typeof msg.content === "string" ? maskedContent : msg.content };
- });
-
- return { masked, context };
-}
-
-/**
- * Streaming unmask helper - processes chunks and unmasks when complete placeholders are found
- *
- * Returns the unmasked portion and any remaining buffer that might contain partial placeholders
- */
-export function unmaskStreamChunk(
- buffer: string,
- newChunk: string,
- context: MaskingContext,
- config: MaskingConfig,
-): { output: string; remainingBuffer: string } {
- const combined = buffer + newChunk;
-
- const partialStart = findPartialPlaceholderStart(combined);
-
- if (partialStart === -1) {
- // No partial placeholder, safe to unmask everything
- return {
- output: unmask(combined, context, config),
- remainingBuffer: "",
- };
- }
-
- // Partial placeholder detected, buffer it
- const safeToProcess = combined.slice(0, partialStart);
- const toBuffer = combined.slice(partialStart);
-
- return {
- output: unmask(safeToProcess, context, config),
- remainingBuffer: toBuffer,
- };
-}
-
-/**
- * Flushes remaining buffer at end of stream
- */
-export function flushStreamBuffer(
- buffer: string,
- context: MaskingContext,
- config: MaskingConfig,
-): string {
- if (!buffer) return "";
- return unmask(buffer, context, config);
-}
-
-/**
- * Unmasks a chat completion response by replacing placeholders in all choices
- */
-export function unmaskResponse(
- response: ChatCompletionResponse,
- context: MaskingContext,
- config: MaskingConfig,
-): ChatCompletionResponse {
- return {
- ...response,
- choices: response.choices.map((choice) => ({
- ...choice,
- message: {
- ...choice.message,
- content:
- typeof choice.message.content === "string"
- ? unmask(choice.message.content, context, config)
- : choice.message.content,
- },
- })),
- };
-}
import { describe, expect, test } from "bun:test";
import type { MaskingConfig } from "../config";
-import { createMaskingContext } from "./masking";
+import { createMaskingContext } from "../pii/mask";
import { createUnmaskingStream } from "./stream-transformer";
const defaultConfig: MaskingConfig = {
import type { MaskingConfig } from "../config";
-import {
- flushRedactionBuffer,
- type RedactionContext,
- unredactStreamChunk,
-} from "../secrets/redact";
-import { flushStreamBuffer, type MaskingContext, unmaskStreamChunk } from "./masking";
+import { flushMaskingBuffer, unmaskStreamChunk } from "../pii/mask";
+import { flushSecretsMaskingBuffer, unmaskSecretsStreamChunk } from "../secrets/mask";
+import type { PlaceholderContext } from "../utils/message-transform";
/**
* Creates a transform stream that unmasks SSE content
* Processes Server-Sent Events (SSE) chunks, buffering partial placeholders
* and unmasking complete ones before forwarding to the client.
*
- * Supports both PII unmasking and secret unredaction, or either alone.
+ * Supports both PII unmasking and secrets unmasking, or either alone.
*/
export function createUnmaskingStream(
source: ReadableStream<Uint8Array>,
- piiContext: MaskingContext | undefined,
+ piiContext: PlaceholderContext | undefined,
config: MaskingConfig,
- secretsContext?: RedactionContext,
+ secretsContext?: PlaceholderContext,
): ReadableStream<Uint8Array> {
const decoder = new TextDecoder();
const encoder = new TextEncoder();
// Flush PII buffer first
if (piiBuffer && piiContext) {
- flushed = flushStreamBuffer(piiBuffer, piiContext, config);
+ flushed = flushMaskingBuffer(piiBuffer, piiContext, config);
} else if (piiBuffer) {
flushed = piiBuffer;
}
// Then flush secrets buffer
if (secretsBuffer && secretsContext) {
- flushed += flushRedactionBuffer(secretsBuffer, secretsContext);
+ flushed += flushSecretsMaskingBuffer(secretsBuffer, secretsContext);
} else if (secretsBuffer) {
flushed += secretsBuffer;
}
processedContent = output;
}
- // Then unredact secrets if context provided
+ // Then unmask secrets if context provided
if (secretsContext && processedContent) {
- const { output, remainingBuffer } = unredactStreamChunk(
+ const { output, remainingBuffer } = unmaskSecretsStreamChunk(
secretsBuffer,
processedContent,
secretsContext,
--- /dev/null
+/**
+ * Test utilities for creating detection results
+ *
+ * Shared helpers for creating PIIDetectionResult and MessageSecretsResult
+ * from per-message, per-part data in tests.
+ */
+
+import type { SupportedLanguage } from "../constants/languages";
+import type { PIIDetectionResult, PIIEntity } from "../pii/detect";
+import type { MessageSecretsResult, SecretLocation } from "../secrets/detect";
+
+/**
+ * Creates a PIIDetectionResult from per-message, per-part entities
+ *
+ * @param messageEntities - Nested array: messageEntities[msgIdx][partIdx] = entities[]
+ * @param options - Optional overrides for language, scanTimeMs, etc.
+ */
+export function createPIIResult(
+ messageEntities: PIIEntity[][][],
+ options: {
+ language?: SupportedLanguage;
+ languageFallback?: boolean;
+ detectedLanguage?: string;
+ scanTimeMs?: number;
+ } = {},
+): PIIDetectionResult {
+ const allEntities = messageEntities.flat(2);
+ return {
+ hasPII: allEntities.length > 0,
+ messageEntities,
+ allEntities,
+ scanTimeMs: options.scanTimeMs ?? 0,
+ language: options.language ?? "en",
+ languageFallback: options.languageFallback ?? false,
+ detectedLanguage: options.detectedLanguage,
+ };
+}
+
+/**
+ * Creates a MessageSecretsResult from per-message, per-part locations
+ *
+ * @param messageLocations - Nested array: messageLocations[msgIdx][partIdx] = locations[]
+ */
+export function createSecretsResult(messageLocations: SecretLocation[][][]): MessageSecretsResult {
+ const hasLocations = messageLocations.some((msg) => msg.some((part) => part.length > 0));
+ return {
+ detected: hasLocations,
+ matches: [], // Matches are aggregated separately in real detection
+ messageLocations,
+ };
+}
// Conflict resolution based on Microsoft Presidio's logic
// https://github.com/microsoft/presidio/blob/main/presidio-anonymizer/presidio_anonymizer/anonymizer_engine.py
-export interface EntityWithScore {
+/**
+ * Base interface for items with position (used by both PII and secrets)
+ */
+export interface Span {
start: number;
end: number;
- score: number;
- entity_type: string;
}
-interface Interval {
- start: number;
- end: number;
+/**
+ * Extended interface for PII entities with confidence scores
+ */
+export interface EntityWithScore extends Span {
+ score: number;
+ entity_type: string;
}
-function overlaps(a: Interval, b: Interval): boolean {
+function overlaps(a: Span, b: Span): boolean {
return a.start < b.end && b.start < a.end;
}
return groups;
}
-function mergeOverlapping<T extends Interval>(intervals: T[], merge: (a: T, b: T) => T): T[] {
+function mergeOverlapping<T extends Span>(intervals: T[], merge: (a: T, b: T) => T): T[] {
if (intervals.length <= 1) return [...intervals];
const sorted = [...intervals].sort((a, b) => a.start - b.start);
return removeConflicting(afterMerge);
}
-/** For secrets without scores. Keeps non-overlapping, longer wins ties. */
-export function resolveOverlaps<T extends Interval>(entities: T[]): T[] {
- if (entities.length <= 1) return [...entities];
+/**
+ * Simple conflict resolution for items without scores (secrets)
+ * Keeps non-overlapping spans, longer span wins ties.
+ */
+export function resolveOverlaps<T extends Span>(items: T[]): T[] {
+ if (items.length <= 1) return [...items];
- const sorted = [...entities].sort((a, b) => {
+ const sorted = [...items].sort((a, b) => {
if (a.start !== b.start) return a.start - b.start;
return b.end - b.start - (a.end - a.start);
});
import { describe, expect, test } from "bun:test";
-import { type ContentPart, extractTextContent, hasTextContent } from "./content";
+import { type ContentPart, extractTextContent } from "./content";
describe("extractTextContent", () => {
test("returns empty string for null", () => {
expect(extractTextContent([])).toBe("");
});
});
-
-describe("hasTextContent", () => {
- test("returns false for null", () => {
- expect(hasTextContent(null)).toBe(false);
- });
-
- test("returns false for undefined", () => {
- expect(hasTextContent(undefined)).toBe(false);
- });
-
- test("returns true for non-empty string", () => {
- expect(hasTextContent("Hello")).toBe(true);
- });
-
- test("returns false for empty string", () => {
- expect(hasTextContent("")).toBe(false);
- });
-
- test("returns true for array with text", () => {
- const content: ContentPart[] = [{ type: "text", text: "Hello" }];
- expect(hasTextContent(content)).toBe(true);
- });
-
- test("returns false for array without text", () => {
- const content: ContentPart[] = [
- { type: "image_url", image_url: { url: "https://example.com/image.jpg" } },
- ];
- expect(hasTextContent(content)).toBe(false);
- });
-});
// Unexpected type - return empty string
return "";
}
-
-/**
- * Checks if content has any text
- *
- * @param content - The message content to check
- * @returns true if content contains text, false otherwise
- */
-export function hasTextContent(content: MessageContent): boolean {
- return extractTextContent(content).length > 0;
-}
--- /dev/null
+import { describe, expect, test } from "bun:test";
+import type { ChatMessage } from "../services/llm-client";
+import type { Span } from "./conflict-resolver";
+import {
+ createPlaceholderContext,
+ flushBuffer,
+ incrementAndGenerate,
+ processStreamChunk,
+ replaceWithPlaceholders,
+ restorePlaceholders,
+ restoreResponsePlaceholders,
+ transformMessagesPerPart,
+} from "./message-transform";
+
+/**
+ * Simple placeholder format for testing: [[TYPE_N]]
+ */
+function testPlaceholder(type: string, count: number): string {
+ return `[[${type}_${count}]]`;
+}
+
+/**
+ * Simple conflict resolver that keeps non-overlapping items (first wins)
+ */
+function simpleResolveConflicts<T extends Span>(items: T[]): T[] {
+ if (items.length <= 1) return [...items];
+ const sorted = [...items].sort((a, b) => a.start - b.start);
+ const result: T[] = [sorted[0]];
+ for (let i = 1; i < sorted.length; i++) {
+ const current = sorted[i];
+ const last = result[result.length - 1];
+ if (current.start >= last.end) {
+ result.push(current);
+ }
+ }
+ return result;
+}
+
+interface TestItem extends Span {
+ type: string;
+}
+
+describe("createPlaceholderContext", () => {
+ test("creates empty context", () => {
+ const ctx = createPlaceholderContext();
+ expect(ctx.mapping).toEqual({});
+ expect(ctx.reverseMapping).toEqual({});
+ expect(ctx.counters).toEqual({});
+ });
+});
+
+describe("incrementAndGenerate", () => {
+ test("increments counter and generates placeholder", () => {
+ const ctx = createPlaceholderContext();
+
+ const p1 = incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+ expect(p1).toBe("[[EMAIL_1]]");
+ expect(ctx.counters.EMAIL).toBe(1);
+
+ const p2 = incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+ expect(p2).toBe("[[EMAIL_2]]");
+ expect(ctx.counters.EMAIL).toBe(2);
+ });
+
+ test("tracks different types separately", () => {
+ const ctx = createPlaceholderContext();
+
+ incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+ incrementAndGenerate("PERSON", ctx, testPlaceholder);
+ incrementAndGenerate("EMAIL", ctx, testPlaceholder);
+
+ expect(ctx.counters.EMAIL).toBe(2);
+ expect(ctx.counters.PERSON).toBe(1);
+ });
+});
+
+describe("replaceWithPlaceholders", () => {
+ test("returns original text when no items", () => {
+ const ctx = createPlaceholderContext();
+ const result = replaceWithPlaceholders(
+ "Hello world",
+ [],
+ ctx,
+ (item: TestItem) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+ expect(result).toBe("Hello world");
+ });
+
+ test("replaces single item", () => {
+ const ctx = createPlaceholderContext();
+ const items: TestItem[] = [{ start: 0, end: 5, type: "WORD" }];
+
+ const result = replaceWithPlaceholders(
+ "Hello world",
+ items,
+ ctx,
+ (item) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ expect(result).toBe("[[WORD_1]] world");
+ expect(ctx.mapping["[[WORD_1]]"]).toBe("Hello");
+ });
+
+ test("replaces multiple items", () => {
+ const ctx = createPlaceholderContext();
+ const items: TestItem[] = [
+ { start: 0, end: 5, type: "WORD" },
+ { start: 6, end: 11, type: "WORD" },
+ ];
+
+ const result = replaceWithPlaceholders(
+ "Hello world",
+ items,
+ ctx,
+ (item) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ expect(result).toBe("[[WORD_1]] [[WORD_2]]");
+ });
+
+ test("reuses placeholder for duplicate values", () => {
+ const ctx = createPlaceholderContext();
+ const items: TestItem[] = [
+ { start: 0, end: 3, type: "WORD" },
+ { start: 8, end: 11, type: "WORD" },
+ ];
+
+ const result = replaceWithPlaceholders(
+ "foo bar foo",
+ items,
+ ctx,
+ (item) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ expect(result).toBe("[[WORD_1]] bar [[WORD_1]]");
+ expect(Object.keys(ctx.mapping)).toHaveLength(1);
+ });
+
+ test("preserves context across calls", () => {
+ const ctx = createPlaceholderContext();
+
+ replaceWithPlaceholders(
+ "Hello",
+ [{ start: 0, end: 5, type: "WORD" }],
+ ctx,
+ (item: TestItem) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ const result = replaceWithPlaceholders(
+ "World",
+ [{ start: 0, end: 5, type: "WORD" }],
+ ctx,
+ (item: TestItem) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ expect(result).toBe("[[WORD_2]]");
+ expect(ctx.mapping["[[WORD_1]]"]).toBe("Hello");
+ expect(ctx.mapping["[[WORD_2]]"]).toBe("World");
+ });
+
+ test("handles adjacent items", () => {
+ const ctx = createPlaceholderContext();
+ const items: TestItem[] = [
+ { start: 0, end: 2, type: "A" },
+ { start: 2, end: 4, type: "B" },
+ ];
+
+ const result = replaceWithPlaceholders(
+ "AABB",
+ items,
+ ctx,
+ (item) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ expect(result).toBe("[[A_1]][[B_1]]");
+ });
+});
+
+describe("restorePlaceholders", () => {
+ test("returns original text when no mappings", () => {
+ const ctx = createPlaceholderContext();
+ expect(restorePlaceholders("Hello world", ctx)).toBe("Hello world");
+ });
+
+ test("restores single placeholder", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[WORD_1]]"] = "Hello";
+
+ expect(restorePlaceholders("[[WORD_1]] world", ctx)).toBe("Hello world");
+ });
+
+ test("restores multiple placeholders", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[A_1]]"] = "Hello";
+ ctx.mapping["[[B_1]]"] = "World";
+
+ expect(restorePlaceholders("[[A_1]] [[B_1]]", ctx)).toBe("Hello World");
+ });
+
+ test("restores repeated placeholders", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "test";
+
+ expect(restorePlaceholders("[[X_1]] and [[X_1]]", ctx)).toBe("test and test");
+ });
+
+ test("applies formatValue function", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "secret";
+
+ const result = restorePlaceholders("Value: [[X_1]]", ctx, (v) => `[REDACTED:${v}]`);
+ expect(result).toBe("Value: [REDACTED:secret]");
+ });
+
+ test("leaves unknown placeholders unchanged", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "known";
+
+ expect(restorePlaceholders("[[X_1]] [[Y_1]]", ctx)).toBe("known [[Y_1]]");
+ });
+});
+
+describe("replace -> restore roundtrip", () => {
+ test("preserves original data", () => {
+ const ctx = createPlaceholderContext();
+ const original = "Contact john@example.com or call +1234567890";
+ const items: TestItem[] = [
+ { start: 8, end: 24, type: "EMAIL" },
+ { start: 33, end: 44, type: "PHONE" },
+ ];
+
+ const replaced = replaceWithPlaceholders(
+ original,
+ items,
+ ctx,
+ (item) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ expect(replaced).not.toContain("john@example.com");
+ expect(replaced).not.toContain("+1234567890");
+
+ const restored = restorePlaceholders(replaced, ctx);
+ expect(restored).toBe(original);
+ });
+});
+
+describe("transformMessagesPerPart", () => {
+ test("transforms string content", () => {
+ const messages: ChatMessage[] = [{ role: "user", content: "Hello world" }];
+ const perPartData = [[[{ marker: true }]]];
+
+ const result = transformMessagesPerPart(
+ messages,
+ perPartData,
+ (text, data) => (data.length > 0 ? text.toUpperCase() : text),
+ {},
+ );
+
+ expect(result[0].content).toBe("HELLO WORLD");
+ });
+
+ test("skips messages without data", () => {
+ const messages: ChatMessage[] = [
+ { role: "user", content: "Keep this" },
+ { role: "assistant", content: "And this" },
+ ];
+ const perPartData = [[[]], [[]]];
+
+ const result = transformMessagesPerPart(
+ messages,
+ perPartData,
+ (text) => text.toUpperCase(),
+ {},
+ );
+
+ expect(result[0].content).toBe("Keep this");
+ expect(result[1].content).toBe("And this");
+ });
+
+ test("transforms array content (multimodal)", () => {
+ const messages: ChatMessage[] = [
+ {
+ role: "user",
+ content: [
+ { type: "text", text: "Hello" },
+ { type: "image_url", image_url: { url: "https://example.com/img.jpg" } },
+ ],
+ },
+ ];
+ const perPartData = [[[{ marker: true }], []]];
+
+ const result = transformMessagesPerPart(
+ messages,
+ perPartData,
+ (text, data) => (data.length > 0 ? text.toUpperCase() : text),
+ {},
+ );
+
+ const content = result[0].content as Array<{ type: string; text?: string }>;
+ expect(content[0].text).toBe("HELLO");
+ expect(content[1].type).toBe("image_url");
+ });
+
+ test("preserves message roles", () => {
+ const messages: ChatMessage[] = [
+ { role: "system", content: "sys" },
+ { role: "user", content: "usr" },
+ { role: "assistant", content: "ast" },
+ ];
+ const perPartData = [[[]], [[]], [[]]];
+
+ const result = transformMessagesPerPart(messages, perPartData, (t) => t, {});
+
+ expect(result[0].role).toBe("system");
+ expect(result[1].role).toBe("user");
+ expect(result[2].role).toBe("assistant");
+ });
+
+ test("passes context to transform function", () => {
+ const messages: ChatMessage[] = [{ role: "user", content: "test" }];
+ const perPartData = [[[{ id: 1 }]]];
+ const ctx = { prefix: ">> " };
+
+ const result = transformMessagesPerPart(
+ messages,
+ perPartData,
+ (text, _data, context: { prefix: string }) => context.prefix + text,
+ ctx,
+ );
+
+ expect(result[0].content).toBe(">> test");
+ });
+});
+
+describe("restoreResponsePlaceholders", () => {
+ test("restores placeholders in response choices", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "secret";
+
+ const response = {
+ id: "test",
+ choices: [{ message: { content: "Value: [[X_1]]" } }],
+ };
+
+ const result = restoreResponsePlaceholders(response, ctx);
+ expect(result.choices[0].message.content).toBe("Value: secret");
+ });
+
+ test("handles multiple choices", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "val";
+
+ const response = {
+ id: "test",
+ choices: [{ message: { content: "A: [[X_1]]" } }, { message: { content: "B: [[X_1]]" } }],
+ };
+
+ const result = restoreResponsePlaceholders(response, ctx);
+ expect(result.choices[0].message.content).toBe("A: val");
+ expect(result.choices[1].message.content).toBe("B: val");
+ });
+
+ test("preserves response structure", () => {
+ const ctx = createPlaceholderContext();
+ const response = {
+ id: "resp-123",
+ model: "test-model",
+ choices: [{ message: { content: "text" } }],
+ usage: { tokens: 10 },
+ };
+
+ const result = restoreResponsePlaceholders(response, ctx);
+ expect(result.id).toBe("resp-123");
+ expect(result.model).toBe("test-model");
+ expect(result.usage).toEqual({ tokens: 10 });
+ });
+
+ test("applies formatValue function", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "secret";
+
+ const response = {
+ id: "test",
+ choices: [{ message: { content: "[[X_1]]" } }],
+ };
+
+ const result = restoreResponsePlaceholders(response, ctx, (v) => `<${v}>`);
+ expect(result.choices[0].message.content).toBe("<secret>");
+ });
+
+ test("handles non-string content", () => {
+ const ctx = createPlaceholderContext();
+ const response = {
+ id: "test",
+ choices: [{ message: { content: null } }],
+ };
+
+ const result = restoreResponsePlaceholders(response, ctx);
+ expect(result.choices[0].message.content).toBe(null);
+ });
+});
+
+describe("processStreamChunk", () => {
+ test("processes complete text without placeholders", () => {
+ const ctx = createPlaceholderContext();
+ const restore = (text: string) => text;
+
+ const { output, remainingBuffer } = processStreamChunk("", "Hello world", ctx, restore);
+
+ expect(output).toBe("Hello world");
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("processes complete placeholder", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "secret";
+
+ const { output, remainingBuffer } = processStreamChunk(
+ "",
+ "Value: [[X_1]]!",
+ ctx,
+ restorePlaceholders,
+ );
+
+ expect(output).toBe("Value: secret!");
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("buffers partial placeholder at end", () => {
+ const ctx = createPlaceholderContext();
+
+ const { output, remainingBuffer } = processStreamChunk(
+ "",
+ "Hello [[PARTIAL",
+ ctx,
+ restorePlaceholders,
+ );
+
+ expect(output).toBe("Hello ");
+ expect(remainingBuffer).toBe("[[PARTIAL");
+ });
+
+ test("completes buffered placeholder", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "done";
+
+ const { output, remainingBuffer } = processStreamChunk(
+ "[[X_",
+ "1]] end",
+ ctx,
+ restorePlaceholders,
+ );
+
+ expect(output).toBe("done end");
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("handles multiple chunks with partial placeholders", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[LONG_PLACEHOLDER_1]]"] = "value";
+
+ // First chunk
+ const r1 = processStreamChunk("", "Start [[LONG_", ctx, restorePlaceholders);
+ expect(r1.output).toBe("Start ");
+ expect(r1.remainingBuffer).toBe("[[LONG_");
+
+ // Second chunk
+ const r2 = processStreamChunk(r1.remainingBuffer, "PLACEHOLDER_", ctx, restorePlaceholders);
+ expect(r2.output).toBe("");
+ expect(r2.remainingBuffer).toBe("[[LONG_PLACEHOLDER_");
+
+ // Third chunk completes it
+ const r3 = processStreamChunk(r2.remainingBuffer, "1]] end", ctx, restorePlaceholders);
+ expect(r3.output).toBe("value end");
+ expect(r3.remainingBuffer).toBe("");
+ });
+});
+
+describe("flushBuffer", () => {
+ test("returns empty string for empty buffer", () => {
+ const ctx = createPlaceholderContext();
+ expect(flushBuffer("", ctx, restorePlaceholders)).toBe("");
+ });
+
+ test("flushes incomplete placeholder as-is", () => {
+ const ctx = createPlaceholderContext();
+ expect(flushBuffer("[[INCOMPLETE", ctx, restorePlaceholders)).toBe("[[INCOMPLETE");
+ });
+
+ test("restores complete placeholder in buffer", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[X_1]]"] = "final";
+
+ expect(flushBuffer("[[X_1]]", ctx, restorePlaceholders)).toBe("final");
+ });
+});
+
+describe("edge cases", () => {
+ test("handles unicode text", () => {
+ const ctx = createPlaceholderContext();
+ const items: TestItem[] = [{ start: 0, end: 11, type: "NAME" }];
+
+ const result = replaceWithPlaceholders(
+ "François Müller",
+ items,
+ ctx,
+ (item) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+
+ // Note: JS string indices are UTF-16 code units
+ expect(ctx.mapping["[[NAME_1]]"]).toBe("François Mü");
+
+ const restored = restorePlaceholders(result, ctx);
+ expect(restored).toContain("François Mü");
+ });
+
+ test("handles empty text", () => {
+ const ctx = createPlaceholderContext();
+ const result = replaceWithPlaceholders(
+ "",
+ [],
+ ctx,
+ (item: TestItem) => item.type,
+ (type, ctx) => incrementAndGenerate(type, ctx, testPlaceholder),
+ simpleResolveConflicts,
+ );
+ expect(result).toBe("");
+ });
+
+ test("handles placeholder-like text that is not in mapping", () => {
+ const ctx = createPlaceholderContext();
+ ctx.mapping["[[A_1]]"] = "known";
+
+ const result = restorePlaceholders("[[A_1]] and [[B_1]]", ctx);
+ expect(result).toBe("known and [[B_1]]");
+ });
+});
--- /dev/null
+/**
+ * Generic utilities for per-part message transformations
+ *
+ * Both PII masking and secrets masking need to:
+ * 1. Iterate over messages and their content parts
+ * 2. Apply transformations based on per-part detection data
+ * 3. Handle string vs array content uniformly
+ *
+ * This module provides shared infrastructure to avoid duplication.
+ */
+
+import type { ChatMessage } from "../services/llm-client";
+import type { Span } from "./conflict-resolver";
+import type { ContentPart } from "./content";
+import { findPartialPlaceholderStart } from "./placeholders";
+
+/**
+ * Generic context for placeholder-based transformations
+ * Used by both PII masking and secrets masking
+ */
+export interface PlaceholderContext {
+ /** Maps placeholder -> original value */
+ mapping: Record<string, string>;
+ /** Maps original value -> placeholder (for deduplication) */
+ reverseMapping: Record<string, string>;
+ /** Counter per type for sequential numbering */
+ counters: Record<string, number>;
+}
+
+/**
+ * Result of masking text with placeholders
+ * Used by both PII masking and secrets masking
+ */
+export interface MaskResult {
+ /** Text with sensitive data replaced by placeholders */
+ masked: string;
+ /** Context for unmasking (maps placeholders to original values) */
+ context: PlaceholderContext;
+}
+
+/**
+ * Creates a new placeholder context
+ */
+export function createPlaceholderContext(): PlaceholderContext {
+ return {
+ mapping: {},
+ reverseMapping: {},
+ counters: {},
+ };
+}
+
+/**
+ * Increments counter for type and generates placeholder using format function
+ *
+ * Shared counter logic for both PII masking and secrets masking.
+ */
+export function incrementAndGenerate(
+ type: string,
+ context: PlaceholderContext,
+ format: (type: string, count: number) => string,
+): string {
+ const count = (context.counters[type] || 0) + 1;
+ context.counters[type] = count;
+ return format(type, count);
+}
+
+/**
+ * Transforms messages using per-part data
+ *
+ * Generic function that handles the common pattern of:
+ * - Iterating over messages
+ * - Handling string vs array content
+ * - Applying a transform function per text part
+ *
+ * @param messages - Chat messages to transform
+ * @param perPartData - Per-message, per-part data: data[msgIdx][partIdx]
+ * @param transform - Function to transform text using the part data
+ * @param context - Shared context passed to all transform calls
+ */
+export function transformMessagesPerPart<TData, TContext>(
+ messages: ChatMessage[],
+ perPartData: TData[][][],
+ transform: (text: string, data: TData[], context: TContext) => string,
+ context: TContext,
+): ChatMessage[] {
+ return messages.map((msg, msgIdx) => {
+ const partData = perPartData[msgIdx] || [];
+
+ // String content → data is in partData[0]
+ if (typeof msg.content === "string") {
+ const data = partData[0] || [];
+ if (data.length === 0) return msg;
+ const transformed = transform(msg.content, data, context);
+ return { ...msg, content: transformed };
+ }
+
+ // Array content (multimodal) → data is per-part
+ if (Array.isArray(msg.content)) {
+ const transformedContent = msg.content.map((part: ContentPart, partIdx: number) => {
+ const data = partData[partIdx] || [];
+ if (part.type === "text" && typeof part.text === "string" && data.length > 0) {
+ const transformed = transform(part.text, data, context);
+ return { ...part, text: transformed };
+ }
+ return part;
+ });
+ return { ...msg, content: transformedContent };
+ }
+
+ // Null/undefined content
+ return msg;
+ });
+}
+
+/**
+ * Restores placeholders in text with original values
+ *
+ * Generic function used by both PII unmasking and secrets unmasking.
+ *
+ * @param text - Text containing placeholders
+ * @param context - Context with placeholder mappings
+ * @param formatValue - Optional function to format restored values (e.g., add markers)
+ */
+export function restorePlaceholders(
+ text: string,
+ context: PlaceholderContext,
+ formatValue?: (original: string) => string,
+): string {
+ let result = text;
+
+ // Sort placeholders by length descending to avoid partial replacements
+ const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length);
+
+ for (const placeholder of placeholders) {
+ const originalValue = context.mapping[placeholder];
+ const replacement = formatValue ? formatValue(originalValue) : originalValue;
+ // Replace all occurrences of the placeholder
+ result = result.split(placeholder).join(replacement);
+ }
+
+ return result;
+}
+
+/**
+ * Restores placeholders in a chat completion response
+ *
+ * @param response - The response object with choices
+ * @param context - Context with placeholder mappings
+ * @param formatValue - Optional function to format restored values
+ */
+export function restoreResponsePlaceholders<
+ T extends { choices: Array<{ message: { content: unknown } }> },
+>(response: T, context: PlaceholderContext, formatValue?: (original: string) => string): T {
+ return {
+ ...response,
+ choices: response.choices.map((choice) => ({
+ ...choice,
+ message: {
+ ...choice.message,
+ content:
+ typeof choice.message.content === "string"
+ ? restorePlaceholders(choice.message.content, context, formatValue)
+ : choice.message.content,
+ },
+ })),
+ } as T;
+}
+
+/**
+ * Replaces items in text with placeholders
+ *
+ * Generic function used by both PII masking and secrets masking.
+ * Handles: conflict resolution, placeholder assignment, and replacement.
+ *
+ * @param text - Text to process
+ * @param items - Items with start/end positions to replace
+ * @param context - Placeholder context for tracking mappings
+ * @param getType - Function to get the type string from an item
+ * @param generatePlaceholder - Function to generate placeholder for a type
+ * @param resolveConflicts - Function to resolve overlapping items
+ */
+export function replaceWithPlaceholders<T extends Span>(
+ text: string,
+ items: T[],
+ context: PlaceholderContext,
+ getType: (item: T) => string,
+ generatePlaceholder: (type: string, context: PlaceholderContext) => string,
+ resolveConflicts: (items: T[]) => T[],
+): string {
+ if (items.length === 0) {
+ return text;
+ }
+
+ // Resolve conflicts between overlapping items
+ const resolved = resolveConflicts(items);
+
+ // First pass: sort by start position ascending to assign placeholders in order
+ const sortedByStart = [...resolved].sort((a, b) => a.start - b.start);
+
+ // Assign placeholders in order of appearance
+ const itemPlaceholders = new Map<T, string>();
+ for (const item of sortedByStart) {
+ const originalValue = text.slice(item.start, item.end);
+
+ // Check if we already have a placeholder for this exact value
+ let placeholder = context.reverseMapping[originalValue];
+
+ if (!placeholder) {
+ placeholder = generatePlaceholder(getType(item), context);
+ context.mapping[placeholder] = originalValue;
+ context.reverseMapping[originalValue] = placeholder;
+ }
+
+ itemPlaceholders.set(item, placeholder);
+ }
+
+ // Second pass: sort by start position descending for replacement
+ // This ensures string indices remain valid as we replace
+ const sortedByEnd = [...resolved].sort((a, b) => b.start - a.start);
+
+ let result = text;
+ for (const item of sortedByEnd) {
+ const placeholder = itemPlaceholders.get(item)!;
+ result = result.slice(0, item.start) + placeholder + result.slice(item.end);
+ }
+
+ return result;
+}
+
+/**
+ * Processes a stream chunk, buffering partial placeholders
+ *
+ * Generic function used by both PII unmasking and secrets unmasking.
+ *
+ * @param buffer - Previous buffer content
+ * @param newChunk - New chunk to process
+ * @param context - Placeholder context
+ * @param restore - Function to restore placeholders in text
+ */
+export function processStreamChunk(
+ buffer: string,
+ newChunk: string,
+ context: PlaceholderContext,
+ restore: (text: string, ctx: PlaceholderContext) => string,
+): { output: string; remainingBuffer: string } {
+ const combined = buffer + newChunk;
+
+ const partialStart = findPartialPlaceholderStart(combined);
+
+ if (partialStart === -1) {
+ // No partial placeholder, safe to restore everything
+ return {
+ output: restore(combined, context),
+ remainingBuffer: "",
+ };
+ }
+
+ // Partial placeholder detected, buffer it
+ const safeToProcess = combined.slice(0, partialStart);
+ const toBuffer = combined.slice(partialStart);
+
+ return {
+ output: restore(safeToProcess, context),
+ remainingBuffer: toBuffer,
+ };
+}
+
+/**
+ * Flushes remaining buffer at end of stream
+ *
+ * @param buffer - Remaining buffer content
+ * @param context - Placeholder context
+ * @param restore - Function to restore placeholders in text
+ */
+export function flushBuffer(
+ buffer: string,
+ context: PlaceholderContext,
+ restore: (text: string, ctx: PlaceholderContext) => string,
+): string {
+ if (!buffer) return "";
+ return restore(buffer, context);
+}
test("secret format uses correct delimiters", () => {
expect(SECRET_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.start);
expect(SECRET_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.end);
- expect(SECRET_PLACEHOLDER_FORMAT).toBe("[[SECRET_REDACTED_{N}]]");
+ expect(SECRET_PLACEHOLDER_FORMAT).toBe("[[SECRET_MASKED_{N}]]");
});
});
describe("generateSecretPlaceholder", () => {
test("generates secret placeholder", () => {
const result = generateSecretPlaceholder("API_KEY_OPENAI", 1);
- expect(result).toBe("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
+ expect(result).toBe("[[SECRET_MASKED_API_KEY_OPENAI_1]]");
});
test("generates secret placeholder with different type and count", () => {
const result = generateSecretPlaceholder("PEM_PRIVATE_KEY", 2);
- expect(result).toBe("[[SECRET_REDACTED_PEM_PRIVATE_KEY_2]]");
+ expect(result).toBe("[[SECRET_MASKED_PEM_PRIVATE_KEY_2]]");
});
});
/**
- * Placeholder constants for PII masking and secrets redaction
+ * Placeholder constants for PII masking and secrets masking
* Single source of truth for all placeholder-related logic
*/
/** PII placeholder format: [[TYPE_N]] e.g. [[PERSON_1]], [[EMAIL_ADDRESS_2]] */
export const PII_PLACEHOLDER_FORMAT = "[[{TYPE}_{N}]]";
-/** Secrets placeholder format: [[SECRET_REDACTED_TYPE_N]] e.g. [[SECRET_REDACTED_API_KEY_OPENAI_1]] */
-export const SECRET_PLACEHOLDER_FORMAT = "[[SECRET_REDACTED_{N}]]";
+/** Secrets placeholder format: [[SECRET_MASKED_TYPE_N]] e.g. [[SECRET_MASKED_API_KEY_OPENAI_1]] */
+export const SECRET_PLACEHOLDER_FORMAT = "[[SECRET_MASKED_{N}]]";
/**
* Generates a placeholder string from the format