From: maximiliancw Date: Fri, 9 Jan 2026 15:02:22 +0000 (+0100) Subject: Add reversible redaction module for secret masking: X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=1d8b469b92bf2da08a30fed06fc677e3922ec734;p=sgasser-llm-shield.git Add reversible redaction module for secret masking: - Create redact.ts with RedactionContext for tracking secret mappings - Implement redactSecrets() with configurable placeholder format - Implement unredactSecrets() for restoring original secrets in responses - Add streaming helpers for unredacting SSE responses - Add comprehensive tests covering roundtrip, multiple messages, and streaming --- diff --git a/src/secrets/redact.test.ts b/src/secrets/redact.test.ts new file mode 100644 index 0000000..a1b01ec --- /dev/null +++ b/src/secrets/redact.test.ts @@ -0,0 +1,399 @@ +import { describe, expect, test } from "bun:test"; +import type { SecretsDetectionConfig } from "../config"; +import type { SecretsRedaction } from "./detect"; +import { + createRedactionContext, + flushRedactionBuffer, + redactMessagesSecrets, + redactSecrets, + unredactResponse, + unredactSecrets, + unredactStreamChunk, +} from "./redact"; + +const defaultConfig: SecretsDetectionConfig = { + enabled: true, + action: "redact", + entities: ["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY", "API_KEY_OPENAI"], + max_scan_chars: 200000, + redact_placeholder: "", + log_detected_types: true, +}; + +const sampleSecret = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx"; + +describe("redactSecrets", () => { + test("returns original text when no redactions", () => { + const text = "Hello world"; + const result = redactSecrets(text, [], defaultConfig); + expect(result.redacted).toBe("Hello world"); + expect(Object.keys(result.context.mapping)).toHaveLength(0); + }); + + test("redacts single secret", () => { + const text = `My API key is ${sampleSecret}`; + const redactions: SecretsRedaction[] = [ + { start: 14, end: 14 + sampleSecret.length, type: "API_KEY_OPENAI" }, + ]; + const result = redactSecrets(text, redactions, defaultConfig); + + expect(result.redacted).toBe("My API key is "); + expect(result.context.mapping[""]).toBe(sampleSecret); + }); + + test("redacts multiple secrets of same type", () => { + const text = `Key1: ${sampleSecret} Key2: ${sampleSecret}`; + const redactions: SecretsRedaction[] = [ + { start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }, + { + start: 6 + sampleSecret.length + 7, + end: 6 + sampleSecret.length * 2 + 7, + type: "API_KEY_OPENAI", + }, + ]; + const result = redactSecrets(text, redactions, defaultConfig); + + // Same secret value should get same placeholder + expect(result.redacted).toBe( + "Key1: Key2: ", + ); + expect(Object.keys(result.context.mapping)).toHaveLength(1); + }); + + test("redacts multiple secrets of different types", () => { + const awsKey = "AKIAIOSFODNN7EXAMPLE"; + const text = `OpenAI: ${sampleSecret} AWS: ${awsKey}`; + const redactions: SecretsRedaction[] = [ + { start: 8, end: 8 + sampleSecret.length, type: "API_KEY_OPENAI" }, + { + start: 8 + sampleSecret.length + 6, + end: 8 + sampleSecret.length + 6 + awsKey.length, + type: "API_KEY_AWS", + }, + ]; + const result = redactSecrets(text, redactions, defaultConfig); + + expect(result.redacted).toContain(""); + expect(result.redacted).toContain(""); + expect(Object.keys(result.context.mapping)).toHaveLength(2); + }); + + test("preserves context across multiple calls", () => { + const context = createRedactionContext(); + const text1 = `Key: ${sampleSecret}`; + const redactions1: SecretsRedaction[] = [ + { start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }, + ]; + redactSecrets(text1, redactions1, defaultConfig, context); + + const anotherSecret = "sk-proj-xyz789abc123def456ghi789jkl012mno345pqr678"; + const text2 = `Another: ${anotherSecret}`; + const redactions2: SecretsRedaction[] = [ + { start: 9, end: 9 + anotherSecret.length, type: "API_KEY_OPENAI" }, + ]; + const result2 = redactSecrets(text2, redactions2, defaultConfig, context); + + // Second secret should get incremented counter + expect(result2.redacted).toBe("Another: "); + expect(Object.keys(context.mapping)).toHaveLength(2); + }); + + test("handles custom placeholder format", () => { + const customConfig: SecretsDetectionConfig = { + ...defaultConfig, + redact_placeholder: "[REDACTED:{N}]", + }; + const text = `Key: ${sampleSecret}`; + const redactions: SecretsRedaction[] = [ + { start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }, + ]; + const result = redactSecrets(text, redactions, customConfig); + + expect(result.redacted).toBe("Key: [REDACTED:API_KEY_OPENAI_1]"); + }); +}); + +describe("unredactSecrets", () => { + test("returns original text when no mappings", () => { + const context = createRedactionContext(); + const text = "Hello world"; + const result = unredactSecrets(text, context); + expect(result).toBe("Hello world"); + }); + + test("restores single secret", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const text = "My API key is "; + const result = unredactSecrets(text, context); + + expect(result).toBe(`My API key is ${sampleSecret}`); + }); + + test("restores multiple secrets", () => { + const context = createRedactionContext(); + const awsKey = "AKIAIOSFODNN7EXAMPLE"; + context.mapping[""] = sampleSecret; + context.mapping[""] = awsKey; + + const text = "OpenAI: AWS: "; + const result = unredactSecrets(text, context); + + expect(result).toBe(`OpenAI: ${sampleSecret} AWS: ${awsKey}`); + }); + + test("restores repeated placeholders", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const text = + "Key1: Key2: "; + const result = unredactSecrets(text, context); + + expect(result).toBe(`Key1: ${sampleSecret} Key2: ${sampleSecret}`); + }); +}); + +describe("redact -> unredact roundtrip", () => { + test("preserves original data through roundtrip", () => { + const originalText = ` +Here are my credentials: +OpenAI API Key: ${sampleSecret} +Please store them securely. +`; + const redactions: SecretsRedaction[] = [ + { + start: originalText.indexOf(sampleSecret), + end: originalText.indexOf(sampleSecret) + sampleSecret.length, + type: "API_KEY_OPENAI", + }, + ]; + + const { redacted, context } = redactSecrets(originalText, redactions, defaultConfig); + + // Verify secret is not in redacted text + expect(redacted).not.toContain(sampleSecret); + expect(redacted).toContain(""); + + // Unredact and verify original is restored + const restored = unredactSecrets(redacted, context); + expect(restored).toBe(originalText); + }); + + test("handles empty redactions array", () => { + const text = "No secrets here"; + const { redacted, context } = redactSecrets(text, [], defaultConfig); + const restored = unredactSecrets(redacted, context); + expect(restored).toBe(text); + }); +}); + +describe("redactMessagesSecrets", () => { + test("redacts secrets in multiple messages", () => { + const messages = [ + { role: "user" as const, content: `My key is ${sampleSecret}` }, + { role: "assistant" as const, content: "I'll help you with that." }, + ]; + const redactionsByMessage: SecretsRedaction[][] = [ + [{ start: 10, end: 10 + sampleSecret.length, type: "API_KEY_OPENAI" }], + [], + ]; + + const { redacted, context } = redactMessagesSecrets( + messages, + redactionsByMessage, + defaultConfig, + ); + + expect(redacted[0].content).toContain(""); + expect(redacted[0].content).not.toContain(sampleSecret); + expect(redacted[1].content).toBe("I'll help you with that."); + expect(Object.keys(context.mapping)).toHaveLength(1); + }); + + test("preserves message roles", () => { + const messages = [ + { role: "system" as const, content: "You are helpful" }, + { role: "user" as const, content: `Key: ${sampleSecret}` }, + ]; + const redactionsByMessage: SecretsRedaction[][] = [ + [], + [{ start: 5, end: 5 + sampleSecret.length, type: "API_KEY_OPENAI" }], + ]; + + const { redacted } = redactMessagesSecrets(messages, redactionsByMessage, defaultConfig); + + expect(redacted[0].role).toBe("system"); + expect(redacted[1].role).toBe("user"); + }); + + test("shares context across messages", () => { + const messages = [ + { role: "user" as const, content: `Key1: ${sampleSecret}` }, + { role: "user" as const, content: `Key2: ${sampleSecret}` }, + ]; + const redactionsByMessage: SecretsRedaction[][] = [ + [{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }], + [{ start: 6, end: 6 + sampleSecret.length, type: "API_KEY_OPENAI" }], + ]; + + const { redacted, context } = redactMessagesSecrets( + messages, + redactionsByMessage, + defaultConfig, + ); + + // Same secret should get same placeholder across messages + expect(redacted[0].content).toBe("Key1: "); + expect(redacted[1].content).toBe("Key2: "); + expect(Object.keys(context.mapping)).toHaveLength(1); + }); +}); + +describe("streaming unredact", () => { + test("unredacts complete placeholder in chunk", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const { output, remainingBuffer } = unredactStreamChunk( + "", + "Key: end", + context, + ); + + expect(output).toBe(`Key: ${sampleSecret} end`); + expect(remainingBuffer).toBe(""); + }); + + test("buffers partial placeholder", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const { output, remainingBuffer } = unredactStreamChunk("", "Key: { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const { output, remainingBuffer } = unredactStreamChunk( + " done", + context, + ); + + expect(output).toBe(`${sampleSecret} done`); + expect(remainingBuffer).toBe(""); + }); + + test("handles text without placeholders", () => { + const context = createRedactionContext(); + + const { output, remainingBuffer } = unredactStreamChunk("", "Hello world", context); + + expect(output).toBe("Hello world"); + expect(remainingBuffer).toBe(""); + }); + + test("flushes remaining buffer", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const result = flushRedactionBuffer(" { + const context = createRedactionContext(); + const result = flushRedactionBuffer("", context); + expect(result).toBe(""); + }); +}); + +describe("unredactResponse", () => { + test("unredacts all choices in response", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const response = { + id: "test", + object: "chat.completion" as const, + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + message: { + role: "assistant" as const, + content: "Your key is ", + }, + finish_reason: "stop" as const, + }, + ], + }; + + const result = unredactResponse(response, context); + expect(result.choices[0].message.content).toBe(`Your key is ${sampleSecret}`); + }); + + test("handles multiple choices", () => { + const context = createRedactionContext(); + context.mapping[""] = sampleSecret; + + const response = { + id: "test", + object: "chat.completion" as const, + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + message: { + role: "assistant" as const, + content: "Choice 1: ", + }, + finish_reason: "stop" as const, + }, + { + index: 1, + message: { + role: "assistant" as const, + content: "Choice 2: ", + }, + finish_reason: "stop" as const, + }, + ], + }; + + const result = unredactResponse(response, context); + expect(result.choices[0].message.content).toBe(`Choice 1: ${sampleSecret}`); + expect(result.choices[1].message.content).toBe(`Choice 2: ${sampleSecret}`); + }); + + test("preserves response structure", () => { + const context = createRedactionContext(); + const response = { + id: "test-id", + object: "chat.completion" as const, + created: 12345, + model: "gpt-4-turbo", + choices: [ + { + index: 0, + message: { role: "assistant" as const, content: "Hello" }, + finish_reason: "stop" as const, + }, + ], + usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }, + }; + + const result = unredactResponse(response, context); + expect(result.id).toBe("test-id"); + expect(result.model).toBe("gpt-4-turbo"); + expect(result.usage).toEqual({ prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 }); + }); +}); diff --git a/src/secrets/redact.ts b/src/secrets/redact.ts new file mode 100644 index 0000000..3a2c2a3 --- /dev/null +++ b/src/secrets/redact.ts @@ -0,0 +1,225 @@ +import type { SecretsDetectionConfig } from "../config"; +import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client"; +import type { SecretsRedaction } from "./detect"; + +/** + * Context for tracking secret redaction mappings + * Similar to MaskingContext for PII but for secrets + */ +export interface RedactionContext { + /** Maps placeholder -> original secret */ + mapping: Record; + /** Maps original secret -> placeholder */ + reverseMapping: Record; + /** Counter per secret type for sequential numbering */ + counters: Record; +} + +export interface RedactionResult { + redacted: string; + context: RedactionContext; +} + +/** + * Creates a new redaction context for a request + */ +export function createRedactionContext(): RedactionContext { + return { + mapping: {}, + reverseMapping: {}, + counters: {}, + }; +} + +/** + * Generates a placeholder for a secret type using configured format + * + * Format: configurable via `redact_placeholder`, default "" + * {N} is replaced with sequential number + */ +function generatePlaceholder( + secretType: string, + context: RedactionContext, + config: SecretsDetectionConfig, +): string { + const count = (context.counters[secretType] || 0) + 1; + context.counters[secretType] = count; + + // Use configured placeholder format, replace {N} with count + // Include type in the placeholder to make it unique per type + return config.redact_placeholder.replace("{N}", `${secretType}_${count}`); +} + +/** + * Redacts secrets in text, replacing them with placeholders + * + * Stores mapping in context for later unredaction. + * Redactions must be provided sorted by start position descending (as returned by detectSecrets). + * + * @param text - The text to redact secrets from + * @param redactions - Array of redaction positions (sorted by start position descending) + * @param config - Secrets detection configuration + * @param context - Optional existing context to reuse (for multiple messages) + */ +export function redactSecrets( + text: string, + redactions: SecretsRedaction[], + config: SecretsDetectionConfig, + context?: RedactionContext, +): RedactionResult { + const ctx = context || createRedactionContext(); + + if (redactions.length === 0) { + return { redacted: text, context: ctx }; + } + + // First pass: sort by start position ascending to assign placeholders in order of appearance + const sortedByStart = [...redactions].sort((a, b) => a.start - b.start); + + // Assign placeholders in order of appearance + const redactionPlaceholders = new Map(); + for (const redaction of sortedByStart) { + const originalValue = text.slice(redaction.start, redaction.end); + + // Check if we already have a placeholder for this exact value + let placeholder = ctx.reverseMapping[originalValue]; + + if (!placeholder) { + placeholder = generatePlaceholder(redaction.type, ctx, config); + ctx.mapping[placeholder] = originalValue; + ctx.reverseMapping[originalValue] = placeholder; + } + + redactionPlaceholders.set(redaction, placeholder); + } + + // Second pass: replace from end to start to maintain correct string positions + // Redactions should already be sorted by start descending, but re-sort to be safe + const sortedByEnd = [...redactions].sort((a, b) => b.start - a.start); + + let result = text; + for (const redaction of sortedByEnd) { + const placeholder = redactionPlaceholders.get(redaction)!; + result = result.slice(0, redaction.start) + placeholder + result.slice(redaction.end); + } + + return { redacted: result, context: ctx }; +} + +/** + * Unredacts text by replacing placeholders with original secrets + * + * @param text - Text containing secret placeholders + * @param context - Redaction context with mappings + */ +export function unredactSecrets(text: string, context: RedactionContext): string { + let result = text; + + // Sort placeholders by length descending to avoid partial replacements + const placeholders = Object.keys(context.mapping).sort((a, b) => b.length - a.length); + + for (const placeholder of placeholders) { + const originalValue = context.mapping[placeholder]; + // Replace all occurrences of the placeholder + result = result.split(placeholder).join(originalValue); + } + + return result; +} + +/** + * Redacts secrets in multiple messages (for chat completions) + * + * @param messages - Chat messages to redact + * @param redactionsByMessage - Redactions for each message (indexed by message position) + * @param config - Secrets detection configuration + */ +export function redactMessagesSecrets( + messages: ChatMessage[], + redactionsByMessage: SecretsRedaction[][], + config: SecretsDetectionConfig, +): { redacted: ChatMessage[]; context: RedactionContext } { + const context = createRedactionContext(); + + const redacted = messages.map((msg, i) => { + const redactions = redactionsByMessage[i] || []; + const { redacted: redactedContent } = redactSecrets(msg.content, redactions, config, context); + return { ...msg, content: redactedContent }; + }); + + return { redacted, context }; +} + +/** + * Streaming unredact helper - processes chunks and unredacts when complete placeholders are found + * + * Similar to PII unmasking but for secrets. + * Returns the unredacted portion and any remaining buffer that might contain partial placeholders. + */ +export function unredactStreamChunk( + buffer: string, + newChunk: string, + context: RedactionContext, +): { output: string; remainingBuffer: string } { + const combined = buffer + newChunk; + + // Find the last safe position to unredact (before any potential partial placeholder) + // Look for the start of any potential placeholder pattern + const placeholderStart = combined.lastIndexOf("<"); + + if (placeholderStart === -1) { + // No potential placeholder, safe to unredact everything + return { + output: unredactSecrets(combined, context), + remainingBuffer: "", + }; + } + + // Check if there's a complete placeholder after the last < + const afterStart = combined.slice(placeholderStart); + const hasCompletePlaceholder = afterStart.includes(">"); + + if (hasCompletePlaceholder) { + // The placeholder is complete, safe to unredact everything + return { + output: unredactSecrets(combined, context), + remainingBuffer: "", + }; + } + + // Partial placeholder detected, buffer it + const safeToProcess = combined.slice(0, placeholderStart); + const toBuffer = combined.slice(placeholderStart); + + return { + output: unredactSecrets(safeToProcess, context), + remainingBuffer: toBuffer, + }; +} + +/** + * Flushes remaining buffer at end of stream + */ +export function flushRedactionBuffer(buffer: string, context: RedactionContext): string { + if (!buffer) return ""; + return unredactSecrets(buffer, context); +} + +/** + * Unredacts a chat completion response by replacing placeholders in all choices + */ +export function unredactResponse( + response: ChatCompletionResponse, + context: RedactionContext, +): ChatCompletionResponse { + return { + ...response, + choices: response.choices.map((choice) => ({ + ...choice, + message: { + ...choice.message, + content: unredactSecrets(choice.message.content, context), + }, + })), + }; +}