You send: "Write a follow-up email to Dr. Sarah Chen (sarah.chen@hospital.org)
about next week's project meeting"
-LLM receives: "Write a follow-up email to <PERSON_1> (<EMAIL_ADDRESS_1>)
+LLM receives: "Write a follow-up email to [[PERSON_1]] ([[EMAIL_ADDRESS_1]])
about next week's project meeting"
-LLM responds: "Dear <PERSON_1>, Following up on our discussion..."
+LLM responds: "Dear [[PERSON_1]], Following up on our discussion..."
You receive: "Dear Dr. Sarah Chen, Following up on our discussion..."
```
# The 200KB default covers typical use cases
max_scan_chars: 200000
- # Placeholder format for redaction (only used if action: redact)
- # {N} will be replaced with type and sequential number (e.g., API_KEY_OPENAI_1)
- redact_placeholder: "<SECRET_REDACTED_{N}>"
-
# Log detected secret types (never logs secret content)
# Even if logging.log_content is true, secret content is never logged
log_detected_types: true
"language": "en",
"language_fallback": false,
"detected_language": "en",
- "masked_content": "Hello <EMAIL_ADDRESS_1>",
+ "masked_content": "Hello [[EMAIL_ADDRESS_1]]",
"secrets_detected": 0,
"secrets_types": null
}
PasteGuard finds: `Dr. Sarah Chen` (PERSON), `sarah.chen@hospital.org` (EMAIL)
</Step>
<Step title="Masked request sent">
- Provider receives: `"Write a follow-up email to <PERSON_1> (<EMAIL_ADDRESS_1>)"`
+ Provider receives: `"Write a follow-up email to [[PERSON_1]] ([[EMAIL_ADDRESS_1]])"`
</Step>
<Step title="Response masked">
- Provider responds: `"Dear <PERSON_1>, Following up on our discussion..."`
+ Provider responds: `"Dear [[PERSON_1]], Following up on our discussion..."`
</Step>
<Step title="Response unmasked">
You receive: `"Dear Dr. Sarah Chen, Following up on our discussion..."`
- Secret content is **never** logged, even if `log_content: true`
- Only secret types are logged if `log_detected_types: true`
-- Masked content shows placeholders like `<EMAIL_ADDRESS_1>`, not real PII
+- Masked content shows placeholders like `[[EMAIL_ADDRESS_1]]`, not real PII
| `entities` | Private keys | Secret types to detect |
| `max_scan_chars` | `200000` | Max characters to scan (0 = unlimited) |
| `log_detected_types` | `true` | Log detected types (never logs content) |
-| `redact_placeholder` | `<SECRET_REDACTED_{N}>` | Placeholder format for redaction |
## Actions
```
</Step>
<Step title="PasteGuard masks PII">
- Detected: `Dr. Sarah Chen` → `<PERSON_1>`, `sarah.chen@hospital.org` → `<EMAIL_ADDRESS_1>`
+ Detected: `Dr. Sarah Chen` → `[[PERSON_1]]`, `sarah.chen@hospital.org` → `[[EMAIL_ADDRESS_1]]`
</Step>
<Step title="OpenAI receives">
```
- Write a follow-up email to <PERSON_1> (<EMAIL_ADDRESS_1>)
+ Write a follow-up email to [[PERSON_1]] ([[EMAIL_ADDRESS_1]])
```
</Step>
<Step title="You get the response (unmasked)">
action: z.enum(["block", "redact", "route_local"]).default("redact"),
entities: z.array(z.enum(SecretEntityTypes)).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]),
max_scan_chars: z.coerce.number().int().min(0).default(200000),
- redact_placeholder: z.string().default("<SECRET_REDACTED_{N}>"),
log_detected_types: z.boolean().default(true),
});
--- /dev/null
+import { describe, expect, test } from "bun:test";
+import {
+ findPartialPlaceholderStart,
+ generatePlaceholder,
+ generateSecretPlaceholder,
+ PII_PLACEHOLDER_FORMAT,
+ PLACEHOLDER_DELIMITERS,
+ SECRET_PLACEHOLDER_FORMAT,
+} from "./placeholders";
+
+describe("placeholder constants", () => {
+ test("delimiters are correct", () => {
+ expect(PLACEHOLDER_DELIMITERS.start).toBe("[[");
+ expect(PLACEHOLDER_DELIMITERS.end).toBe("]]");
+ });
+
+ test("PII format uses correct delimiters", () => {
+ expect(PII_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.start);
+ expect(PII_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.end);
+ expect(PII_PLACEHOLDER_FORMAT).toBe("[[{TYPE}_{N}]]");
+ });
+
+ test("secret format uses correct delimiters", () => {
+ expect(SECRET_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.start);
+ expect(SECRET_PLACEHOLDER_FORMAT).toContain(PLACEHOLDER_DELIMITERS.end);
+ expect(SECRET_PLACEHOLDER_FORMAT).toBe("[[SECRET_REDACTED_{N}]]");
+ });
+});
+
+describe("generatePlaceholder", () => {
+ test("generates PII placeholder", () => {
+ const result = generatePlaceholder(PII_PLACEHOLDER_FORMAT, "PERSON", 1);
+ expect(result).toBe("[[PERSON_1]]");
+ });
+
+ test("generates placeholder with different type and count", () => {
+ const result = generatePlaceholder(PII_PLACEHOLDER_FORMAT, "EMAIL_ADDRESS", 3);
+ expect(result).toBe("[[EMAIL_ADDRESS_3]]");
+ });
+});
+
+describe("generateSecretPlaceholder", () => {
+ test("generates secret placeholder", () => {
+ const result = generateSecretPlaceholder("API_KEY_OPENAI", 1);
+ expect(result).toBe("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
+ });
+
+ test("generates secret placeholder with different type and count", () => {
+ const result = generateSecretPlaceholder("PEM_PRIVATE_KEY", 2);
+ expect(result).toBe("[[SECRET_REDACTED_PEM_PRIVATE_KEY_2]]");
+ });
+});
+
+describe("findPartialPlaceholderStart", () => {
+ test("returns -1 for empty string", () => {
+ expect(findPartialPlaceholderStart("")).toBe(-1);
+ });
+
+ test("returns -1 when no placeholder pattern", () => {
+ expect(findPartialPlaceholderStart("Hello world")).toBe(-1);
+ });
+
+ test("returns -1 when placeholder is complete", () => {
+ expect(findPartialPlaceholderStart("Hello [[PERSON_1]] world")).toBe(-1);
+ });
+
+ test("returns -1 when multiple complete placeholders", () => {
+ expect(findPartialPlaceholderStart("[[PERSON_1]] and [[EMAIL_1]]")).toBe(-1);
+ });
+
+ test("returns position of partial placeholder at end", () => {
+ const text = "Hello [[PERSON";
+ expect(findPartialPlaceholderStart(text)).toBe(6);
+ });
+
+ test("returns position of partial placeholder with complete one before", () => {
+ const text = "[[PERSON_1]] Hello [[EMAIL";
+ expect(findPartialPlaceholderStart(text)).toBe(19);
+ });
+
+ test("handles just opening delimiter", () => {
+ const text = "Hello [[";
+ expect(findPartialPlaceholderStart(text)).toBe(6);
+ });
+
+ test("handles text ending with single bracket", () => {
+ // Single [ is not a placeholder start, so should return -1
+ expect(findPartialPlaceholderStart("Hello [")).toBe(-1);
+ });
+});
--- /dev/null
+/**
+ * Placeholder constants for PII masking and secrets redaction
+ * Single source of truth for all placeholder-related logic
+ */
+
+export const PLACEHOLDER_DELIMITERS = {
+ start: "[[",
+ end: "]]",
+} as const;
+
+/** PII placeholder format: [[TYPE_N]] e.g. [[PERSON_1]], [[EMAIL_ADDRESS_2]] */
+export const PII_PLACEHOLDER_FORMAT = "[[{TYPE}_{N}]]";
+
+/** Secrets placeholder format: [[SECRET_REDACTED_TYPE_N]] e.g. [[SECRET_REDACTED_API_KEY_OPENAI_1]] */
+export const SECRET_PLACEHOLDER_FORMAT = "[[SECRET_REDACTED_{N}]]";
+
+/**
+ * Generates a placeholder string from the format
+ */
+export function generatePlaceholder(format: string, type: string, count: number): string {
+ return format.replace("{TYPE}", type).replace("{N}", String(count));
+}
+
+/**
+ * Generates a secret placeholder string
+ * {N} is replaced with TYPE_COUNT e.g. API_KEY_OPENAI_1
+ */
+export function generateSecretPlaceholder(type: string, count: number): string {
+ return SECRET_PLACEHOLDER_FORMAT.replace("{N}", `${type}_${count}`);
+}
+
+/**
+ * Streaming buffer helper - finds safe position to process text
+ * that may contain partial placeholders
+ *
+ * Returns the position where it's safe to split, or -1 if entire string is safe
+ */
+export function findPartialPlaceholderStart(text: string): number {
+ const placeholderStart = text.lastIndexOf(PLACEHOLDER_DELIMITERS.start);
+
+ if (placeholderStart === -1) {
+ return -1; // No potential placeholder, entire string is safe
+ }
+
+ // Check if there's a complete placeholder after the last [[
+ const afterStart = text.slice(placeholderStart);
+ const hasCompletePlaceholder = afterStart.includes(PLACEHOLDER_DELIMITERS.end);
+
+ if (hasCompletePlaceholder) {
+ return -1; // Placeholder is complete, entire string is safe
+ }
+
+ return placeholderStart; // Return position where partial placeholder starts
+}
import { HTTPException } from "hono/http-exception";
import { proxy } from "hono/proxy";
import { z } from "zod";
-import { getConfig, type MaskingConfig, type SecretsDetectionConfig } from "../config";
+import { getConfig, type MaskingConfig } from "../config";
import {
detectSecrets,
extractTextFromRequest,
// Redact action - replace secrets with placeholders and continue
if (config.secrets_detection.action === "redact") {
- const redactedMessages = redactMessagesWithSecrets(
- body.messages,
- secretsResult,
- config.secrets_detection,
- );
+ const redactedMessages = redactMessagesWithSecrets(body.messages, secretsResult);
body = { ...body, messages: redactedMessages.messages };
redactionContext = redactedMessages.context;
secretsRedacted = true;
function redactMessagesWithSecrets(
messages: ChatMessage[],
secretsResult: SecretsDetectionResult,
- config: SecretsDetectionConfig,
): { messages: ChatMessage[]; context: RedactionContext } {
// Build a map of message content to redactions
// Since we concatenated all messages with \n, we need to track positions per message
const { redacted, context: updatedContext } = redactSecrets(
part.text,
partRedactions,
- config,
context,
);
context = updatedContext;
const { redacted, context: updatedContext } = redactSecrets(
msg.content,
messageRedactions,
- config,
context,
);
context = updatedContext;
action: "block",
entities: ["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"],
max_scan_chars: 200000,
- redact_placeholder: "<SECRET_REDACTED_{N}>",
log_detected_types: true,
};
import { describe, expect, test } from "bun:test";
-import type { SecretsDetectionConfig } from "../config";
import type { ChatMessage } from "../services/llm-client";
import { maskMessages } from "../services/masking";
import type { PIIEntity } from "../services/pii-detector";
import type { ContentPart } from "../utils/content";
describe("Multimodal content handling", () => {
- const _secretsConfig: SecretsDetectionConfig = {
- enabled: true,
- action: "redact",
- entities: ["API_KEY_OPENAI"],
- max_scan_chars: 200000,
- redact_placeholder: "<SECRET_REDACTED_{N}>",
- log_detected_types: true,
- };
-
- describe("Secrets redaction with offset tracking", () => {
- // Note: Secrets are not expected to span across newlines in real scenarios
- // The offset tracking is implemented to handle PII entities correctly
- });
-
describe("PII masking with offset tracking", () => {
test("masks PII in multimodal array content", () => {
const messages: ChatMessage[] = [
// Part 0 should have email masked
expect(maskedContent[0].type).toBe("text");
- expect(maskedContent[0].text).toBe("My email is <EMAIL_ADDRESS_1> and");
+ expect(maskedContent[0].text).toBe("My email is [[EMAIL_ADDRESS_1]] and");
expect(maskedContent[0].text).not.toContain("john@example.com");
// Part 1 should be unchanged (image)
// Part 2 should have phone masked
expect(maskedContent[2].type).toBe("text");
- expect(maskedContent[2].text).toBe("my phone is <PHONE_NUMBER_1>");
+ expect(maskedContent[2].text).toBe("my phone is [[PHONE_NUMBER_1]]");
expect(maskedContent[2].text).not.toContain("555-1234");
});
// Verify the text is actually masked (not the original)
expect(maskedContent[0].text).not.toContain("Alice");
expect(maskedContent[0].text).not.toContain("alice@secret.com");
- expect(maskedContent[0].text).toContain("<PERSON_1>");
- expect(maskedContent[0].text).toContain("<EMAIL_ADDRESS_1>");
+ expect(maskedContent[0].text).toContain("[[PERSON_1]]");
+ expect(maskedContent[0].text).toContain("[[EMAIL_ADDRESS_1]]");
});
test("handles entities spanning multiple parts with proper offsets", () => {
const maskedContent = masked[0].content as ContentPart[];
// Both parts should be affected by the email entity
- // Part 0: "First part with <EMAIL" or similar
- // Part 1: "ADDRESS_1> in two parts" or similar
+ // Part 0: "First part with [[EMAIL" or similar
+ // Part 1: "ADDRESS_1]] in two parts" or similar
// The exact split depends on how the masking handles cross-boundary entities
// At minimum, verify that the entity is masked somewhere
.map((p) => p.text)
.join("\n");
- expect(fullMasked).toContain("<EMAIL_ADDRESS_");
+ expect(fullMasked).toContain("[[EMAIL_ADDRESS_");
expect(fullMasked).not.toContain("email@example.com");
});
});
import { describe, expect, test } from "bun:test";
-import type { SecretsDetectionConfig } from "../config";
import type { SecretsRedaction } from "./detect";
import {
createRedactionContext,
unredactStreamChunk,
} from "./redact";
-const defaultConfig: SecretsDetectionConfig = {
- enabled: true,
- action: "redact",
- entities: ["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY", "API_KEY_OPENAI"],
- max_scan_chars: 200000,
- redact_placeholder: "<SECRET_REDACTED_{N}>",
- log_detected_types: true,
-};
-
const sampleSecret = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx";
describe("redactSecrets", () => {
test("returns original text when no redactions", () => {
const text = "Hello world";
- const result = redactSecrets(text, [], defaultConfig);
+ const result = redactSecrets(text, []);
expect(result.redacted).toBe("Hello world");
expect(Object.keys(result.context.mapping)).toHaveLength(0);
});
const redactions: SecretsRedaction[] = [
{ start: 14, end: 14 + sampleSecret.length, type: "API_KEY_OPENAI" },
];
- const result = redactSecrets(text, redactions, defaultConfig);
+ const result = redactSecrets(text, redactions);
- expect(result.redacted).toBe("My API key is <SECRET_REDACTED_API_KEY_OPENAI_1>");
- expect(result.context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"]).toBe(sampleSecret);
+ expect(result.redacted).toBe("My API key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
+ expect(result.context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"]).toBe(sampleSecret);
});
test("redacts multiple secrets of same type", () => {
type: "API_KEY_OPENAI",
},
];
- const result = redactSecrets(text, redactions, defaultConfig);
+ const result = redactSecrets(text, redactions);
// Same secret value should get same placeholder
expect(result.redacted).toBe(
- "Key1: <SECRET_REDACTED_API_KEY_OPENAI_1> Key2: <SECRET_REDACTED_API_KEY_OPENAI_1>",
+ "Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]] Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
);
expect(Object.keys(result.context.mapping)).toHaveLength(1);
});
type: "API_KEY_AWS",
},
];
- const result = redactSecrets(text, redactions, defaultConfig);
+ const result = redactSecrets(text, redactions);
- expect(result.redacted).toContain("<SECRET_REDACTED_API_KEY_OPENAI_1>");
- expect(result.redacted).toContain("<SECRET_REDACTED_API_KEY_AWS_1>");
+ expect(result.redacted).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
+ expect(result.redacted).toContain("[[SECRET_REDACTED_API_KEY_AWS_1]]");
expect(Object.keys(result.context.mapping)).toHaveLength(2);
});
const redactions1: SecretsRedaction[] = [
{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" },
];
- redactSecrets(text1, redactions1, defaultConfig, context);
+ redactSecrets(text1, redactions1, context);
const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678";
const text2 = `Another: ${anotherSecret}`;
const redactions2: SecretsRedaction[] = [
{ start: 9, end: 9 + anotherSecret.length, type: "API_KEY_OPENAI" },
];
- const result2 = redactSecrets(text2, redactions2, defaultConfig, context);
+ const result2 = redactSecrets(text2, redactions2, context);
// Second secret should get incremented counter
- expect(result2.redacted).toBe("Another: <SECRET_REDACTED_API_KEY_OPENAI_2>");
+ expect(result2.redacted).toBe("Another: [[SECRET_REDACTED_API_KEY_OPENAI_2]]");
expect(Object.keys(context.mapping)).toHaveLength(2);
});
-
- test("handles custom placeholder format", () => {
- const customConfig: SecretsDetectionConfig = {
- ...defaultConfig,
- redact_placeholder: "[REDACTED:{N}]",
- };
- const text = `Key: ${sampleSecret}`;
- const redactions: SecretsRedaction[] = [
- { start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" },
- ];
- const result = redactSecrets(text, redactions, customConfig);
-
- expect(result.redacted).toBe("Key: [REDACTED:API_KEY_OPENAI_1]");
- });
});
describe("unredactSecrets", () => {
test("restores single secret", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
- const text = "My API key is <SECRET_REDACTED_API_KEY_OPENAI_1>";
+ const text = "My API key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]";
const result = unredactSecrets(text, context);
expect(result).toBe(`My API key is ${sampleSecret}`);
test("restores multiple secrets", () => {
const context = createRedactionContext();
const awsKey = "AKIAIOSFODNN7EXAMPLE";
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
- context.mapping["<SECRET_REDACTED_API_KEY_AWS_1>"] = awsKey;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_AWS_1]]"] = awsKey;
- const text = "OpenAI: <SECRET_REDACTED_API_KEY_OPENAI_1> AWS: <SECRET_REDACTED_API_KEY_AWS_1>";
+ const text =
+ "OpenAI: [[SECRET_REDACTED_API_KEY_OPENAI_1]] AWS: [[SECRET_REDACTED_API_KEY_AWS_1]]";
const result = unredactSecrets(text, context);
expect(result).toBe(`OpenAI: ${sampleSecret} AWS: ${awsKey}`);
test("restores repeated placeholders", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
const text =
- "Key1: <SECRET_REDACTED_API_KEY_OPENAI_1> Key2: <SECRET_REDACTED_API_KEY_OPENAI_1>";
+ "Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]] Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]";
const result = unredactSecrets(text, context);
expect(result).toBe(`Key1: ${sampleSecret} Key2: ${sampleSecret}`);
},
];
- const { redacted, context } = redactSecrets(originalText, redactions, defaultConfig);
+ const { redacted, context } = redactSecrets(originalText, redactions);
// Verify secret is not in redacted text
expect(redacted).not.toContain(sampleSecret);
- expect(redacted).toContain("<SECRET_REDACTED_API_KEY_OPENAI_1>");
+ expect(redacted).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
// Unredact and verify original is restored
const restored = unredactSecrets(redacted, context);
test("handles empty redactions array", () => {
const text = "No secrets here";
- const { redacted, context } = redactSecrets(text, [], defaultConfig);
+ const { redacted, context } = redactSecrets(text, []);
const restored = unredactSecrets(redacted, context);
expect(restored).toBe(text);
});
[],
];
- const { redacted, context } = redactMessagesSecrets(
- messages,
- redactionsByMessage,
- defaultConfig,
- );
+ const { redacted, context } = redactMessagesSecrets(messages, redactionsByMessage);
- expect(redacted[0].content).toContain("<SECRET_REDACTED_API_KEY_OPENAI_1>");
+ expect(redacted[0].content).toContain("[[SECRET_REDACTED_API_KEY_OPENAI_1]]");
expect(redacted[0].content).not.toContain(sampleSecret);
expect(redacted[1].content).toBe("I'll help you with that.");
expect(Object.keys(context.mapping)).toHaveLength(1);
[{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }],
];
- const { redacted } = redactMessagesSecrets(messages, redactionsByMessage, defaultConfig);
+ const { redacted } = redactMessagesSecrets(messages, redactionsByMessage);
expect(redacted[0].role).toBe("system");
expect(redacted[1].role).toBe("user");
[{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }],
];
- const { redacted, context } = redactMessagesSecrets(
- messages,
- redactionsByMessage,
- defaultConfig,
- );
+ const { redacted, context } = redactMessagesSecrets(messages, redactionsByMessage);
// Same secret should get same placeholder across messages
- expect(redacted[0].content).toBe("Key1: <SECRET_REDACTED_API_KEY_OPENAI_1>");
- expect(redacted[1].content).toBe("Key2: <SECRET_REDACTED_API_KEY_OPENAI_1>");
+ expect(redacted[0].content).toBe("Key1: [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
+ expect(redacted[1].content).toBe("Key2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]");
expect(Object.keys(context.mapping)).toHaveLength(1);
});
});
describe("streaming unredact", () => {
test("unredacts complete placeholder in chunk", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
const { output, remainingBuffer } = unredactStreamChunk(
"",
- "Key: <SECRET_REDACTED_API_KEY_OPENAI_1> end",
+ "Key: [[SECRET_REDACTED_API_KEY_OPENAI_1]] end",
context,
);
test("buffers partial placeholder", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
- const { output, remainingBuffer } = unredactStreamChunk("", "Key: <SECRET_RED", context);
+ const { output, remainingBuffer } = unredactStreamChunk("", "Key: [[SECRET_RED", context);
expect(output).toBe("Key: ");
- expect(remainingBuffer).toBe("<SECRET_RED");
+ expect(remainingBuffer).toBe("[[SECRET_RED");
});
test("completes buffered placeholder", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
const { output, remainingBuffer } = unredactStreamChunk(
- "<SECRET_RED",
- "ACTED_API_KEY_OPENAI_1> done",
+ "[[SECRET_RED",
+ "ACTED_API_KEY_OPENAI_1]] done",
context,
);
test("flushes remaining buffer", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
const result = flushRedactionBuffer("<incomplete", context);
expect(result).toBe("<incomplete");
describe("unredactResponse", () => {
test("unredacts all choices in response", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
const response = {
id: "test",
index: 0,
message: {
role: "assistant" as const,
- content: "Your key is <SECRET_REDACTED_API_KEY_OPENAI_1>",
+ content: "Your key is [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
},
finish_reason: "stop" as const,
},
test("handles multiple choices", () => {
const context = createRedactionContext();
- context.mapping["<SECRET_REDACTED_API_KEY_OPENAI_1>"] = sampleSecret;
+ context.mapping["[[SECRET_REDACTED_API_KEY_OPENAI_1]]"] = sampleSecret;
const response = {
id: "test",
index: 0,
message: {
role: "assistant" as const,
- content: "Choice 1: <SECRET_REDACTED_API_KEY_OPENAI_1>",
+ content: "Choice 1: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
},
finish_reason: "stop" as const,
},
index: 1,
message: {
role: "assistant" as const,
- content: "Choice 2: <SECRET_REDACTED_API_KEY_OPENAI_1>",
+ content: "Choice 2: [[SECRET_REDACTED_API_KEY_OPENAI_1]]",
},
finish_reason: "stop" as const,
},
-import type { SecretsDetectionConfig } from "../config";
+import { findPartialPlaceholderStart, generateSecretPlaceholder } from "../constants/placeholders";
import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client";
import { extractTextContent } from "../utils/content";
import type { SecretsRedaction } from "./detect";
}
/**
- * Generates a placeholder for a secret type using configured format
+ * Generates a placeholder for a secret type
*
- * Format: configurable via `redact_placeholder`, default "<SECRET_REDACTED_{N}>"
- * {N} is replaced with sequential number
+ * Format: [[SECRET_REDACTED_{TYPE}_{N}]] e.g. [[SECRET_REDACTED_API_KEY_OPENAI_1]]
*/
-function generatePlaceholder(
- secretType: string,
- context: RedactionContext,
- config: SecretsDetectionConfig,
-): string {
+function generatePlaceholder(secretType: string, context: RedactionContext): string {
const count = (context.counters[secretType] || 0) + 1;
context.counters[secretType] = count;
- // Use configured placeholder format, replace {N} with count
- // Include type in the placeholder to make it unique per type
- return config.redact_placeholder.replace("{N}", `${secretType}_${count}`);
+ return generateSecretPlaceholder(secretType, count);
}
/**
*
* @param text - The text to redact secrets from
* @param redactions - Array of redaction positions (sorted by start position descending)
- * @param config - Secrets detection configuration
* @param context - Optional existing context to reuse (for multiple messages)
*/
export function redactSecrets(
text: string,
redactions: SecretsRedaction[],
- config: SecretsDetectionConfig,
context?: RedactionContext,
): RedactionResult {
const ctx = context || createRedactionContext();
let placeholder = ctx.reverseMapping[originalValue];
if (!placeholder) {
- placeholder = generatePlaceholder(redaction.type, ctx, config);
+ placeholder = generatePlaceholder(redaction.type, ctx);
ctx.mapping[placeholder] = originalValue;
ctx.reverseMapping[originalValue] = placeholder;
}
*
* @param messages - Chat messages to redact
* @param redactionsByMessage - Redactions for each message (indexed by message position)
- * @param config - Secrets detection configuration
*/
export function redactMessagesSecrets(
messages: ChatMessage[],
redactionsByMessage: SecretsRedaction[][],
- config: SecretsDetectionConfig,
): { redacted: ChatMessage[]; context: RedactionContext } {
const context = createRedactionContext();
const redacted = messages.map((msg, i) => {
const redactions = redactionsByMessage[i] || [];
const text = extractTextContent(msg.content);
- const { redacted: redactedContent } = redactSecrets(text, redactions, config, context);
+ const { redacted: redactedContent } = redactSecrets(text, redactions, context);
// If original content was a string, return redacted string
// Otherwise return original content (arrays are handled in proxy.ts)
): { output: string; remainingBuffer: string } {
const combined = buffer + newChunk;
- // Find the last safe position to unredact (before any potential partial placeholder)
- // Look for the start of any potential placeholder pattern
- const placeholderStart = combined.lastIndexOf("<");
-
- if (placeholderStart === -1) {
- // No potential placeholder, safe to unredact everything
- return {
- output: unredactSecrets(combined, context),
- remainingBuffer: "",
- };
- }
-
- // Check if there's a complete placeholder after the last <
- const afterStart = combined.slice(placeholderStart);
- const hasCompletePlaceholder = afterStart.includes(">");
+ const partialStart = findPartialPlaceholderStart(combined);
- if (hasCompletePlaceholder) {
- // The placeholder is complete, safe to unredact everything
+ if (partialStart === -1) {
+ // No partial placeholder, safe to unredact everything
return {
output: unredactSecrets(combined, context),
remainingBuffer: "",
}
// Partial placeholder detected, buffer it
- const safeToProcess = combined.slice(0, placeholderStart);
- const toBuffer = combined.slice(placeholderStart);
+ const safeToProcess = combined.slice(0, partialStart);
+ const toBuffer = combined.slice(partialStart);
return {
output: unredactSecrets(safeToProcess, context),
const result = mask("Contact: john@example.com please", entities);
- expect(result.masked).toBe("Contact: <EMAIL_ADDRESS_1> please");
- expect(result.context.mapping["<EMAIL_ADDRESS_1>"]).toBe("john@example.com");
+ expect(result.masked).toBe("Contact: [[EMAIL_ADDRESS_1]] please");
+ expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("john@example.com");
});
test("masks multiple entities of same type", () => {
const result = mask(text, entities);
- expect(result.masked).toBe("Emails: <EMAIL_ADDRESS_1> and <EMAIL_ADDRESS_2>");
- expect(result.context.mapping["<EMAIL_ADDRESS_1>"]).toBe("a@b.com");
- expect(result.context.mapping["<EMAIL_ADDRESS_2>"]).toBe("c@d.com");
+ expect(result.masked).toBe("Emails: [[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_2]]");
+ expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("a@b.com");
+ expect(result.context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("c@d.com");
});
test("masks multiple entity types", () => {
const result = mask(text, entities);
- expect(result.masked).toBe("<PERSON_1>: <EMAIL_ADDRESS_1>");
- expect(result.context.mapping["<PERSON_1>"]).toBe("Hans Müller");
- expect(result.context.mapping["<EMAIL_ADDRESS_1>"]).toBe("hans@firma.de");
+ expect(result.masked).toBe("[[PERSON_1]]: [[EMAIL_ADDRESS_1]]");
+ expect(result.context.mapping["[[PERSON_1]]"]).toBe("Hans Müller");
+ expect(result.context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("hans@firma.de");
});
test("reuses placeholder for duplicate values", () => {
const result = mask(text, entities);
// Same value should get same placeholder
- expect(result.masked).toBe("<EMAIL_ADDRESS_1> and again <EMAIL_ADDRESS_1>");
+ expect(result.masked).toBe("[[EMAIL_ADDRESS_1]] and again [[EMAIL_ADDRESS_1]]");
expect(Object.keys(result.context.mapping)).toHaveLength(1);
});
const result = mask(text, entities);
- expect(result.masked).toBe("<PERSON_1><PERSON_2>");
+ expect(result.masked).toBe("[[PERSON_1]][[PERSON_2]]");
});
test("preserves context across calls", () => {
context,
);
- expect(result1.masked).toBe("Email: <EMAIL_ADDRESS_1>");
+ expect(result1.masked).toBe("Email: [[EMAIL_ADDRESS_1]]");
const result2 = mask(
"Another: c@d.com",
);
// Should continue numbering
- expect(result2.masked).toBe("Another: <EMAIL_ADDRESS_2>");
- expect(context.mapping["<EMAIL_ADDRESS_1>"]).toBe("a@b.com");
- expect(context.mapping["<EMAIL_ADDRESS_2>"]).toBe("c@d.com");
+ expect(result2.masked).toBe("Another: [[EMAIL_ADDRESS_2]]");
+ expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("a@b.com");
+ expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("c@d.com");
});
});
test("restores single placeholder", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "john@example.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
- const result = unmask("Reply to <EMAIL_ADDRESS_1>", context, defaultConfig);
+ const result = unmask("Reply to [[EMAIL_ADDRESS_1]]", context, defaultConfig);
expect(result).toBe("Reply to john@example.com");
});
test("restores multiple placeholders", () => {
const context = createMaskingContext();
- context.mapping["<PERSON_1>"] = "Hans Müller";
- context.mapping["<EMAIL_ADDRESS_1>"] = "hans@firma.de";
+ context.mapping["[[PERSON_1]]"] = "Hans Müller";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "hans@firma.de";
const result = unmask(
- "Hello <PERSON_1>, your email <EMAIL_ADDRESS_1> is confirmed",
+ "Hello [[PERSON_1]], your email [[EMAIL_ADDRESS_1]] is confirmed",
context,
defaultConfig,
);
test("restores repeated placeholders", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
- const result = unmask("<EMAIL_ADDRESS_1> and <EMAIL_ADDRESS_1>", context, defaultConfig);
+ const result = unmask("[[EMAIL_ADDRESS_1]] and [[EMAIL_ADDRESS_1]]", context, defaultConfig);
expect(result).toBe("test@test.com and test@test.com");
});
test("adds markers when configured", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "john@example.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@example.com";
- const result = unmask("Email: <EMAIL_ADDRESS_1>", context, configWithMarkers);
+ const result = unmask("Email: [[EMAIL_ADDRESS_1]]", context, configWithMarkers);
expect(result).toBe("Email: [protected]john@example.com");
});
test("handles partial placeholder (no match)", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
- const result = unmask("Text with <EMAIL_ADDRESS_2>", context, defaultConfig);
- expect(result).toBe("Text with <EMAIL_ADDRESS_2>"); // No match, unchanged
+ const result = unmask("Text with [[EMAIL_ADDRESS_2]]", context, defaultConfig);
+ expect(result).toBe("Text with [[EMAIL_ADDRESS_2]]"); // No match, unchanged
});
});
expect(masked).not.toContain("+49123456789");
// Simulate LLM response that echoes placeholders
- const llmResponse = `I see your contact info: ${masked.match(/<PERSON_1>/)?.[0]}, email ${masked.match(/<EMAIL_ADDRESS_1>/)?.[0]}`;
+ const llmResponse = `I see your contact info: ${masked.match(/\[\[PERSON_1\]\]/)?.[0]}, email ${masked.match(/\[\[EMAIL_ADDRESS_1\]\]/)?.[0]}`;
const unmasked = unmask(llmResponse, context, defaultConfig);
const { masked, context } = maskMessages(messages, entitiesByMessage);
- expect(masked[0].content).toBe("My email is <EMAIL_ADDRESS_1>");
+ expect(masked[0].content).toBe("My email is [[EMAIL_ADDRESS_1]]");
expect(masked[1].content).toBe("Got it");
- expect(masked[2].content).toBe("Also <EMAIL_ADDRESS_2>");
+ expect(masked[2].content).toBe("Also [[EMAIL_ADDRESS_2]]");
- expect(context.mapping["<EMAIL_ADDRESS_1>"]).toBe("test@example.com");
- expect(context.mapping["<EMAIL_ADDRESS_2>"]).toBe("john@test.com");
+ expect(context.mapping["[[EMAIL_ADDRESS_1]]"]).toBe("test@example.com");
+ expect(context.mapping["[[EMAIL_ADDRESS_2]]"]).toBe("john@test.com");
});
test("preserves message roles", () => {
describe("streaming unmask", () => {
test("unmasks complete placeholder in chunk", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
const { output, remainingBuffer } = unmaskStreamChunk(
"",
- "Hello <EMAIL_ADDRESS_1>!",
+ "Hello [[EMAIL_ADDRESS_1]]!",
context,
defaultConfig,
);
test("buffers partial placeholder", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
const { output, remainingBuffer } = unmaskStreamChunk(
"",
- "Hello <EMAIL_ADD",
+ "Hello [[EMAIL_ADD",
context,
defaultConfig,
);
expect(output).toBe("Hello ");
- expect(remainingBuffer).toBe("<EMAIL_ADD");
+ expect(remainingBuffer).toBe("[[EMAIL_ADD");
});
test("completes buffered placeholder", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
const { output, remainingBuffer } = unmaskStreamChunk(
- "<EMAIL_ADD",
- "RESS_1> there",
+ "[[EMAIL_ADD",
+ "RESS_1]] there",
context,
defaultConfig,
);
test("flushes remaining buffer", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
// Partial that never completes
- const flushed = flushStreamBuffer("<EMAIL_ADD", context, defaultConfig);
+ const flushed = flushStreamBuffer("[[EMAIL_ADD", context, defaultConfig);
// Should return as-is since no complete placeholder
- expect(flushed).toBe("<EMAIL_ADD");
+ expect(flushed).toBe("[[EMAIL_ADD");
});
});
describe("unmaskResponse", () => {
test("unmasks all choices in response", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
- context.mapping["<PERSON_1>"] = "John Doe";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
+ context.mapping["[[PERSON_1]]"] = "John Doe";
const response = {
id: "chatcmpl-123",
index: 0,
message: {
role: "assistant" as const,
- content: "Contact <PERSON_1> at <EMAIL_ADDRESS_1>",
+ content: "Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]",
},
finish_reason: "stop" as const,
},
test("handles multiple choices", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "a@b.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "a@b.com";
const response = {
id: "chatcmpl-456",
choices: [
{
index: 0,
- message: { role: "assistant" as const, content: "First: <EMAIL_ADDRESS_1>" },
+ message: { role: "assistant" as const, content: "First: [[EMAIL_ADDRESS_1]]" },
finish_reason: "stop" as const,
},
{
index: 1,
- message: { role: "assistant" as const, content: "Second: <EMAIL_ADDRESS_1>" },
+ message: { role: "assistant" as const, content: "Second: [[EMAIL_ADDRESS_1]]" },
finish_reason: "stop" as const,
},
],
const entities: PIIEntity[] = [{ entity_type: "PERSON", start: 9, end: 24, score: 0.9 }];
const { masked, context } = mask(text, entities);
- expect(masked).toBe("Kontakt: <PERSON_1>");
+ expect(masked).toBe("Kontakt: [[PERSON_1]]");
const unmasked = unmask(masked, context, defaultConfig);
expect(unmasked).toBe("Kontakt: François Müller");
test("handles placeholder-like text that is not a real placeholder", () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
- const result = unmask("Use <UNKNOWN_1> format", context, defaultConfig);
- expect(result).toBe("Use <UNKNOWN_1> format");
+ const result = unmask("Use [[UNKNOWN_1]] format", context, defaultConfig);
+ expect(result).toBe("Use [[UNKNOWN_1]] format");
+ });
+});
+
+describe("HTML context handling (issue #36)", () => {
+ test("unmasks placeholders in HTML without encoding issues", () => {
+ // With [[]] format, placeholders are not affected by HTML encoding
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah.chen@hospital.org";
+
+ // [[]] brackets don't get HTML-encoded, so they work directly
+ const htmlResponse = `<p>Contact [[PERSON_1]] at [[EMAIL_ADDRESS_1]]</p>`;
+
+ const result = unmask(htmlResponse, context, defaultConfig);
+
+ expect(result).toBe("<p>Contact Dr. Sarah Chen at sarah.chen@hospital.org</p>");
+ });
+
+ test("unmasks placeholders in HTML title attributes", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "Jane Smith";
+
+ // [[]] works in HTML attributes without encoding
+ const htmlWithAttr = `<span title="Contact [[PERSON_1]]">Click here</span>`;
+
+ const result = unmask(htmlWithAttr, context, defaultConfig);
+
+ expect(result).toBe(`<span title="Contact Jane Smith">Click here</span>`);
+ });
+
+ test("unmasks placeholders in mailto links", () => {
+ const context = createMaskingContext();
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@example.com";
+
+ const mailtoHtml = `<a href="mailto:[[EMAIL_ADDRESS_1]]">Send email</a>`;
+
+ const result = unmask(mailtoHtml, context, defaultConfig);
+
+ expect(result).toBe(`<a href="mailto:test@example.com">Send email</a>`);
+ });
+
+ test("handles multiple occurrences of same placeholder in HTML", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "Alice";
+
+ const response = `<p>[[PERSON_1]] said hello.</p><p>[[PERSON_1]] waved goodbye.</p>`;
+
+ const result = unmask(response, context, defaultConfig);
+
+ expect(result).toBe("<p>Alice said hello.</p><p>Alice waved goodbye.</p>");
+ });
+
+ test("works with complex HTML structures", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "Dr. Sarah Chen";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "sarah@hospital.org";
+ context.mapping["[[PHONE_NUMBER_1]]"] = "+1-555-0123";
+
+ const complexHtml = `
+ <div class="profile">
+ <h1>[[PERSON_1]]</h1>
+ <a href="mailto:[[EMAIL_ADDRESS_1]]">[[EMAIL_ADDRESS_1]]</a>
+ <span data-phone="[[PHONE_NUMBER_1]]">Call: [[PHONE_NUMBER_1]]</span>
+ </div>
+ `;
+
+ const result = unmask(complexHtml, context, defaultConfig);
+
+ expect(result).toContain("Dr. Sarah Chen");
+ expect(result).toContain("sarah@hospital.org");
+ expect(result).toContain("+1-555-0123");
+ expect(result).not.toContain("[[");
+ expect(result).not.toContain("]]");
+ });
+});
+
+describe("streaming with [[]] placeholders (issue #36)", () => {
+ test("handles complete placeholder in chunk", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ const { output, remainingBuffer } = unmaskStreamChunk(
+ "",
+ "Hello [[PERSON_1]]!",
+ context,
+ defaultConfig,
+ );
+
+ expect(output).toBe("Hello John Doe!");
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("buffers partial placeholder at end of chunk", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ // Partial placeholder at end: [[PERS
+ const { output, remainingBuffer } = unmaskStreamChunk(
+ "",
+ "Hello [[PERS",
+ context,
+ defaultConfig,
+ );
+
+ expect(output).toBe("Hello ");
+ expect(remainingBuffer).toBe("[[PERS");
+ });
+
+ test("completes buffered placeholder across chunks", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ const { output, remainingBuffer } = unmaskStreamChunk(
+ "[[PERS",
+ "ON_1]] there",
+ context,
+ defaultConfig,
+ );
+
+ expect(output).toBe("John Doe there");
+ expect(remainingBuffer).toBe("");
+ });
+
+ test("handles placeholder split at closing brackets", () => {
+ const context = createMaskingContext();
+ context.mapping["[[PERSON_1]]"] = "John Doe";
+
+ // First chunk ends with incomplete closing
+ const result1 = unmaskStreamChunk("", "Hello [[PERSON_1]", context, defaultConfig);
+ expect(result1.output).toBe("Hello ");
+ expect(result1.remainingBuffer).toBe("[[PERSON_1]");
+
+ // Second chunk completes it
+ const result2 = unmaskStreamChunk(result1.remainingBuffer, "] world", context, defaultConfig);
+ expect(result2.output).toBe("John Doe world");
+ expect(result2.remainingBuffer).toBe("");
});
});
import type { MaskingConfig } from "../config";
+import {
+ findPartialPlaceholderStart,
+ generatePlaceholder as generatePlaceholderFromFormat,
+ PII_PLACEHOLDER_FORMAT,
+} from "../constants/placeholders";
import { extractTextContent } from "../utils/content";
import type { ChatCompletionResponse, ChatMessage } from "./llm-client";
import type { PIIEntity } from "./pii-detector";
};
}
-const PLACEHOLDER_FORMAT = "<{TYPE}_{N}>";
-
/**
* Generates a placeholder for a PII entity type
*/
const count = (context.counters[entityType] || 0) + 1;
context.counters[entityType] = count;
- return PLACEHOLDER_FORMAT.replace("{TYPE}", entityType).replace("{N}", String(count));
+ return generatePlaceholderFromFormat(PII_PLACEHOLDER_FORMAT, entityType, count);
}
/**
): { output: string; remainingBuffer: string } {
const combined = buffer + newChunk;
- // Find the last safe position to unmask (before any potential partial placeholder)
- // Look for the start of any potential placeholder pattern
- const placeholderStart = combined.lastIndexOf("<");
-
- if (placeholderStart === -1) {
- // No potential placeholder, safe to unmask everything
- return {
- output: unmask(combined, context, config),
- remainingBuffer: "",
- };
- }
-
- // Check if there's a complete placeholder after the last <
- const afterStart = combined.slice(placeholderStart);
- const hasCompletePlaceholder = afterStart.includes(">");
+ const partialStart = findPartialPlaceholderStart(combined);
- if (hasCompletePlaceholder) {
- // The placeholder is complete, safe to unmask everything
+ if (partialStart === -1) {
+ // No partial placeholder, safe to unmask everything
return {
output: unmask(combined, context, config),
remainingBuffer: "",
}
// Partial placeholder detected, buffer it
- const safeToProcess = combined.slice(0, placeholderStart);
- const toBuffer = combined.slice(placeholderStart);
+ const safeToProcess = combined.slice(0, partialStart);
+ const toBuffer = combined.slice(partialStart);
return {
output: unmask(safeToProcess, context, config),
describe("createUnmaskingStream", () => {
test("unmasks complete placeholder in single chunk", async () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
- const sseData = `data: {"choices":[{"delta":{"content":"Hello <EMAIL_ADDRESS_1>!"}}]}\n\n`;
+ const sseData = `data: {"choices":[{"delta":{"content":"Hello [[EMAIL_ADDRESS_1]]!"}}]}\n\n`;
const source = createSSEStream([sseData]);
const unmaskedStream = createUnmaskingStream(source, context, defaultConfig);
test("buffers partial placeholder across chunks", async () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "a@b.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "a@b.com";
// Split placeholder across chunks
const chunks = [
- `data: {"choices":[{"delta":{"content":"Hello <EMAIL_"}}]}\n\n`,
- `data: {"choices":[{"delta":{"content":"ADDRESS_1> world"}}]}\n\n`,
+ `data: {"choices":[{"delta":{"content":"Hello [[EMAIL_"}}]}\n\n`,
+ `data: {"choices":[{"delta":{"content":"ADDRESS_1]] world"}}]}\n\n`,
];
const source = createSSEStream(chunks);
test("flushes remaining buffer on stream end", async () => {
const context = createMaskingContext();
- context.mapping["<EMAIL_ADDRESS_1>"] = "test@test.com";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "test@test.com";
// Partial placeholder that completes only on flush
- const chunks = [`data: {"choices":[{"delta":{"content":"Contact <EMAIL_ADDRESS_1>"}}]}\n\n`];
+ const chunks = [`data: {"choices":[{"delta":{"content":"Contact [[EMAIL_ADDRESS_1]]"}}]}\n\n`];
const source = createSSEStream(chunks);
const unmaskedStream = createUnmaskingStream(source, context, defaultConfig);
test("handles multiple placeholders in stream", async () => {
const context = createMaskingContext();
- context.mapping["<PERSON_1>"] = "John";
- context.mapping["<EMAIL_ADDRESS_1>"] = "john@test.com";
+ context.mapping["[[PERSON_1]]"] = "John";
+ context.mapping["[[EMAIL_ADDRESS_1]]"] = "john@test.com";
- const sseData = `data: {"choices":[{"delta":{"content":"<PERSON_1>: <EMAIL_ADDRESS_1>"}}]}\n\n`;
+ const sseData = `data: {"choices":[{"delta":{"content":"[[PERSON_1]]: [[EMAIL_ADDRESS_1]]"}}]}\n\n`;
const source = createSSEStream([sseData]);
const unmaskedStream = createUnmaskingStream(source, context, defaultConfig);
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
- .replace(/<([A-Z_]+_\\d+)>/g, '<span class="bg-accent-bg text-accent px-1 py-0.5 rounded-sm font-medium"><$1></span>');
+ .replace(/\\[\\[([A-Z_]+_\\d+)\\]\\]/g, '<span class="bg-accent-bg text-accent px-1 py-0.5 rounded-sm font-medium">[[$1]]</span>');
}
if (!entities || entities.length === 0) {
return '<span class="text-text-muted">No PII detected in this request</span>';