From: maximiliancw Date: Fri, 9 Jan 2026 14:52:56 +0000 (+0100) Subject: feat(secrets): add detection for API keys, JWT tokens, and Bearer tokens X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=c4c90052d3793351ed852d796a620febee14baf4;p=sgasser-llm-shield.git feat(secrets): add detection for API keys, JWT tokens, and Bearer tokens - Add new secret entity types: API_KEY_OPENAI, API_KEY_AWS, API_KEY_GITHUB, JWT_TOKEN, BEARER_TOKEN - Extract pattern detection into reusable helper function - Add comprehensive tests for all new secret types with false positive checks - Update config schema with typed entity enum --- diff --git a/src/config.ts b/src/config.ts index 93403dc..426456f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -92,10 +92,21 @@ const DashboardSchema = z.object({ auth: DashboardAuthSchema.optional(), }); +// All supported secret entity types +const SecretEntityTypes = [ + "OPENSSH_PRIVATE_KEY", + "PEM_PRIVATE_KEY", + "API_KEY_OPENAI", + "API_KEY_AWS", + "API_KEY_GITHUB", + "JWT_TOKEN", + "BEARER_TOKEN", +] as const; + const SecretsDetectionSchema = z.object({ enabled: z.boolean().default(true), action: z.enum(["block", "redact", "route_local"]).default("block"), - entities: z.array(z.string()).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]), + entities: z.array(z.enum(SecretEntityTypes)).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]), max_scan_chars: z.coerce.number().int().min(0).default(200000), redact_placeholder: z.string().default(""), log_detected_types: z.boolean().default(true), diff --git a/src/secrets/detect.test.ts b/src/secrets/detect.test.ts index 581f5f1..18ba9fc 100644 --- a/src/secrets/detect.test.ts +++ b/src/secrets/detect.test.ts @@ -182,6 +182,197 @@ describe("detectSecrets", () => { }); }); +// Test data for new secret types +const openaiApiKey = "sk-proj-abc123def456ghi789jkl012mno345pqr678stu901vwx"; +const awsAccessKey = "AKIAIOSFODNN7EXAMPLE"; +const githubToken = "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx1234"; +const githubOAuthToken = "gho_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx5678"; +const jwtToken = + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"; +const bearerToken = "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9abcdefghijk"; + +describe("detectSecrets - API Keys", () => { + const apiKeyConfig: SecretsDetectionConfig = { + ...defaultConfig, + entities: ["API_KEY_OPENAI", "API_KEY_AWS", "API_KEY_GITHUB"], + }; + + test("detects OpenAI API key", () => { + const text = `My API key is ${openaiApiKey}`; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].type).toBe("API_KEY_OPENAI"); + expect(result.matches[0].count).toBe(1); + expect(result.redactions).toBeDefined(); + expect(result.redactions?.[0].type).toBe("API_KEY_OPENAI"); + }); + + test("detects AWS access key", () => { + const text = `AWS key: ${awsAccessKey}`; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].type).toBe("API_KEY_AWS"); + expect(result.matches[0].count).toBe(1); + }); + + test("detects GitHub personal access token", () => { + const text = `export GITHUB_TOKEN=${githubToken}`; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].type).toBe("API_KEY_GITHUB"); + }); + + test("detects GitHub OAuth token", () => { + const text = `OAuth: ${githubOAuthToken}`; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].type).toBe("API_KEY_GITHUB"); + }); + + test("detects multiple API keys of different types", () => { + const text = `OpenAI: ${openaiApiKey}\nAWS: ${awsAccessKey}\nGitHub: ${githubToken}`; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(3); + expect(result.matches.find((m) => m.type === "API_KEY_OPENAI")).toBeDefined(); + expect(result.matches.find((m) => m.type === "API_KEY_AWS")).toBeDefined(); + expect(result.matches.find((m) => m.type === "API_KEY_GITHUB")).toBeDefined(); + }); + + test("avoids false positive - sk- prefix but too short", () => { + const text = "This sk-short is not a valid key"; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(false); + }); + + test("avoids false positive - AKIA prefix but wrong length", () => { + const text = "AKIA12345 is not valid"; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(false); + }); + + test("avoids false positive - ghp_ prefix but too short", () => { + const text = "ghp_tooshort is not valid"; + const result = detectSecrets(text, apiKeyConfig); + expect(result.detected).toBe(false); + }); +}); + +describe("detectSecrets - JWT Tokens", () => { + const jwtConfig: SecretsDetectionConfig = { + ...defaultConfig, + entities: ["JWT_TOKEN"], + }; + + test("detects JWT token", () => { + const text = `Authorization: ${jwtToken}`; + const result = detectSecrets(text, jwtConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].type).toBe("JWT_TOKEN"); + expect(result.matches[0].count).toBe(1); + }); + + test("detects JWT in JSON context", () => { + const text = `{"token": "${jwtToken}"}`; + const result = detectSecrets(text, jwtConfig); + expect(result.detected).toBe(true); + expect(result.matches[0].type).toBe("JWT_TOKEN"); + }); + + test("detects multiple JWT tokens", () => { + const text = `Access: ${jwtToken}\nRefresh: ${jwtToken}`; + const result = detectSecrets(text, jwtConfig); + expect(result.detected).toBe(true); + expect(result.matches[0].count).toBe(2); + }); + + test("avoids false positive - eyJ but incomplete structure", () => { + const text = "eyJhbGciOiJIUzI1NiJ9 is not complete"; + const result = detectSecrets(text, jwtConfig); + expect(result.detected).toBe(false); + }); + + test("avoids false positive - random text with dots", () => { + const text = "some.random.text is not a JWT"; + const result = detectSecrets(text, jwtConfig); + expect(result.detected).toBe(false); + }); +}); + +describe("detectSecrets - Bearer Tokens", () => { + const bearerConfig: SecretsDetectionConfig = { + ...defaultConfig, + entities: ["BEARER_TOKEN"], + }; + + test("detects Bearer token", () => { + const text = `Authorization: ${bearerToken}`; + const result = detectSecrets(text, bearerConfig); + expect(result.detected).toBe(true); + expect(result.matches).toHaveLength(1); + expect(result.matches[0].type).toBe("BEARER_TOKEN"); + }); + + test("detects bearer token (lowercase)", () => { + const text = "bearer abcdefghijklmnopqrstuvwxyz1234567890"; + const result = detectSecrets(text, bearerConfig); + expect(result.detected).toBe(true); + expect(result.matches[0].type).toBe("BEARER_TOKEN"); + }); + + test("avoids false positive - Bearer with short token", () => { + const text = "Bearer short"; + const result = detectSecrets(text, bearerConfig); + expect(result.detected).toBe(false); + }); +}); + +describe("detectSecrets - Mixed secret types", () => { + const allConfig: SecretsDetectionConfig = { + ...defaultConfig, + entities: [ + "OPENSSH_PRIVATE_KEY", + "PEM_PRIVATE_KEY", + "API_KEY_OPENAI", + "API_KEY_AWS", + "API_KEY_GITHUB", + "JWT_TOKEN", + "BEARER_TOKEN", + ], + }; + + test("detects multiple secret types in same text", () => { + const text = ` +Config file: +API_KEY=${openaiApiKey} +AWS_KEY=${awsAccessKey} +TOKEN=${jwtToken} +${rsaKey} +`; + const result = detectSecrets(text, allConfig); + expect(result.detected).toBe(true); + expect(result.matches.length).toBeGreaterThanOrEqual(4); + }); + + test("redaction positions are correct for all types", () => { + const text = `Key: ${awsAccessKey} and ${githubToken}`; + const result = detectSecrets(text, allConfig); + expect(result.redactions).toBeDefined(); + expect(result.redactions?.length).toBe(2); + + // Verify redactions point to correct positions + for (const redaction of result.redactions || []) { + const extracted = text.slice(redaction.start, redaction.end); + expect(extracted.length).toBeGreaterThan(10); + } + }); +}); + describe("extractTextFromRequest", () => { test("extracts text from simple messages", () => { const request: ChatCompletionRequest = { diff --git a/src/secrets/detect.ts b/src/secrets/detect.ts index 7b33402..73fdc8e 100644 --- a/src/secrets/detect.ts +++ b/src/secrets/detect.ts @@ -1,15 +1,27 @@ import type { SecretsDetectionConfig } from "../config"; import type { ChatCompletionRequest } from "../services/llm-client"; +/** + * All supported secret entity types + */ +export type SecretEntityType = + | "OPENSSH_PRIVATE_KEY" + | "PEM_PRIVATE_KEY" + | "API_KEY_OPENAI" + | "API_KEY_AWS" + | "API_KEY_GITHUB" + | "JWT_TOKEN" + | "BEARER_TOKEN"; + export interface SecretsMatch { - type: "OPENSSH_PRIVATE_KEY" | "PEM_PRIVATE_KEY"; + type: SecretEntityType; count: number; } export interface SecretsRedaction { start: number; end: number; - type: string; + type: SecretEntityType; } export interface SecretsDetectionResult { @@ -34,11 +46,48 @@ export function extractTextFromRequest(body: ChatCompletionRequest): string { } /** - * Detects secret material (e.g. private keys) in text + * Helper to detect secrets matching a pattern and add to matches/redactions + */ +function detectPattern( + textToScan: string, + pattern: RegExp, + entityType: SecretEntityType, + matches: SecretsMatch[], + redactions: SecretsRedaction[], + existingPositions?: Set, +): number { + let count = 0; + for (const match of textToScan.matchAll(pattern)) { + if (match.index !== undefined) { + // Skip if this position was already matched by another pattern + if (existingPositions?.has(match.index)) continue; + + count++; + existingPositions?.add(match.index); + redactions.push({ + start: match.index, + end: match.index + match[0].length, + type: entityType, + }); + } + } + if (count > 0) { + matches.push({ type: entityType, count }); + } + return count; +} + +/** + * Detects secret material (e.g. private keys, API keys, tokens) in text * * Scans for: * - OpenSSH private keys: -----BEGIN OPENSSH PRIVATE KEY----- * - PEM private keys: RSA, PRIVATE KEY, ENCRYPTED PRIVATE KEY + * - OpenAI API keys: sk-... (48+ chars) + * - AWS access keys: AKIA... (20 chars) + * - GitHub tokens: ghp_, gho_, ghu_, ghs_, ghr_ (40+ chars) + * - JWT tokens: eyJ... (three base64 segments) + * - Bearer tokens: Bearer ... (in Authorization-style contexts) * * Respects max_scan_chars limit for performance. */ @@ -63,21 +112,7 @@ export function detectSecrets( if (entitiesToDetect.has("OPENSSH_PRIVATE_KEY")) { const opensshPattern = /-----BEGIN OPENSSH PRIVATE KEY-----[\s\S]*?-----END OPENSSH PRIVATE KEY-----/g; - const opensshMatches = textToScan.matchAll(opensshPattern); - let count = 0; - for (const match of opensshMatches) { - count++; - if (match.index !== undefined) { - redactions.push({ - start: match.index, - end: match.index + match[0].length, - type: "OPENSSH_PRIVATE_KEY", - }); - } - } - if (count > 0) { - matches.push({ type: "OPENSSH_PRIVATE_KEY", count }); - } + detectPattern(textToScan, opensshPattern, "OPENSSH_PRIVATE_KEY", matches, redactions); } // PEM private key patterns @@ -87,56 +122,81 @@ export function detectSecrets( // RSA PRIVATE KEY const rsaPattern = /-----BEGIN RSA PRIVATE KEY-----[\s\S]*?-----END RSA PRIVATE KEY-----/g; - let rsaCount = 0; - for (const match of textToScan.matchAll(rsaPattern)) { - rsaCount++; - if (match.index !== undefined) { - matchedPositions.add(match.index); - redactions.push({ - start: match.index, - end: match.index + match[0].length, - type: "PEM_PRIVATE_KEY", - }); - } + detectPattern(textToScan, rsaPattern, "PEM_PRIVATE_KEY", matches, redactions, matchedPositions); + + // Remove PEM_PRIVATE_KEY from matches to accumulate all PEM types together + const pemMatch = matches.find((m) => m.type === "PEM_PRIVATE_KEY"); + if (pemMatch) { + matches.splice(matches.indexOf(pemMatch), 1); } + let totalPemCount = pemMatch?.count || 0; // PRIVATE KEY (generic) - exclude RSA matches const privateKeyPattern = /-----BEGIN PRIVATE KEY-----[\s\S]*?-----END PRIVATE KEY-----/g; - let privateKeyCount = 0; - for (const match of textToScan.matchAll(privateKeyPattern)) { - if (match.index !== undefined && !matchedPositions.has(match.index)) { - privateKeyCount++; - matchedPositions.add(match.index); - redactions.push({ - start: match.index, - end: match.index + match[0].length, - type: "PEM_PRIVATE_KEY", - }); - } - } + const tempMatches: SecretsMatch[] = []; + detectPattern( + textToScan, + privateKeyPattern, + "PEM_PRIVATE_KEY", + tempMatches, + redactions, + matchedPositions, + ); + totalPemCount += tempMatches[0]?.count || 0; // ENCRYPTED PRIVATE KEY const encryptedPattern = /-----BEGIN ENCRYPTED PRIVATE KEY-----[\s\S]*?-----END ENCRYPTED PRIVATE KEY-----/g; - let encryptedCount = 0; - for (const match of textToScan.matchAll(encryptedPattern)) { - if (match.index !== undefined && !matchedPositions.has(match.index)) { - encryptedCount++; - matchedPositions.add(match.index); - redactions.push({ - start: match.index, - end: match.index + match[0].length, - type: "PEM_PRIVATE_KEY", - }); - } - } + const tempMatches2: SecretsMatch[] = []; + detectPattern( + textToScan, + encryptedPattern, + "PEM_PRIVATE_KEY", + tempMatches2, + redactions, + matchedPositions, + ); + totalPemCount += tempMatches2[0]?.count || 0; - const totalPemCount = rsaCount + privateKeyCount + encryptedCount; if (totalPemCount > 0) { matches.push({ type: "PEM_PRIVATE_KEY", count: totalPemCount }); } } + // OpenAI API keys: sk-... followed by alphanumeric chars + // Modern format: sk-proj-... or sk-... with 48+ total chars + if (entitiesToDetect.has("API_KEY_OPENAI")) { + // Match sk- followed by optional prefix (proj-, etc.) and alphanumeric/dash/underscore + const openaiPattern = /sk-[a-zA-Z0-9_-]{45,}/g; + detectPattern(textToScan, openaiPattern, "API_KEY_OPENAI", matches, redactions); + } + + // AWS access keys: AKIA followed by 16 uppercase alphanumeric chars + if (entitiesToDetect.has("API_KEY_AWS")) { + const awsPattern = /AKIA[0-9A-Z]{16}/g; + detectPattern(textToScan, awsPattern, "API_KEY_AWS", matches, redactions); + } + + // GitHub tokens: ghp_, gho_, ghu_, ghs_, ghr_ followed by 36+ alphanumeric chars + if (entitiesToDetect.has("API_KEY_GITHUB")) { + const githubPattern = /gh[pousr]_[a-zA-Z0-9]{36,}/g; + detectPattern(textToScan, githubPattern, "API_KEY_GITHUB", matches, redactions); + } + + // JWT tokens: three base64url segments separated by dots + // Header starts with eyJ (base64 for {"...) + if (entitiesToDetect.has("JWT_TOKEN")) { + const jwtPattern = /eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]+/g; + detectPattern(textToScan, jwtPattern, "JWT_TOKEN", matches, redactions); + } + + // Bearer tokens in Authorization-style contexts + // Matches "Bearer " followed by a token (at least 20 chars) + if (entitiesToDetect.has("BEARER_TOKEN")) { + const bearerPattern = /Bearer\s+[a-zA-Z0-9._-]{20,}/gi; + detectPattern(textToScan, bearerPattern, "BEARER_TOKEN", matches, redactions); + } + // Sort redactions by start position (descending) for safe replacement redactions.sort((a, b) => b.start - a.start);