From: Stefan Gasser Date: Wed, 21 Jan 2026 06:14:05 +0000 (+0100) Subject: Add generic /api/mask endpoint for standalone text masking X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=2616333de57733fadece292ae43e1f174b298151;p=sgasser-llm-shield.git Add generic /api/mask endpoint for standalone text masking Adds a new POST /api/mask endpoint that can be used by browser extensions, CLIs, or any client that needs to mask PII and secrets independently of the OpenAI/Anthropic proxy routes. Features: - Detects and masks both PII and secrets (configurable via detect param) - Returns context mapping for client-side unmasking - Supports multi-turn conversations via startFrom counters - Auto-detects language or accepts explicit language parameter - Logs requests to dashboard for visibility - Rejects whitespace-only text input - Consistent error handling for both PII and secrets detection --- diff --git a/docs/api-reference/mask.mdx b/docs/api-reference/mask.mdx new file mode 100644 index 0000000..7cd593c --- /dev/null +++ b/docs/api-reference/mask.mdx @@ -0,0 +1,203 @@ +--- +title: Mask API +description: POST /api/mask +--- + +# Mask API + +Standalone endpoint for masking PII and secrets in text. Use this endpoint when you need to mask text independently of LLM provider proxies. + +``` +POST /api/mask +``` + +## Use Cases + +- Browser extensions that need to mask clipboard content before pasting +- CLI tools that pre-process prompts +- Custom integrations that handle their own LLM communication +- Multi-turn conversations where you need to maintain placeholder numbering + +## Request + +```bash +curl -X POST http://localhost:3000/api/mask \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Contact john@example.com or call 555-1234" + }' +``` + +## Parameters + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `text` | string | Yes | Text to scan and mask | +| `language` | string | No | Language code for PII detection (auto-detected if not provided) | +| `startFrom` | object | No | Counter values to continue numbering from previous calls | +| `detect` | array | No | What to detect: `["pii"]`, `["secrets"]`, or `["pii", "secrets"]` (default: both) | + +## Response + +```json +{ + "masked": "Contact [[EMAIL_ADDRESS_1]] or call [[PHONE_NUMBER_1]]", + "context": { + "[[EMAIL_ADDRESS_1]]": "john@example.com", + "[[PHONE_NUMBER_1]]": "555-1234" + }, + "counters": { + "EMAIL_ADDRESS": 1, + "PHONE_NUMBER": 1 + }, + "entities": [ + { "type": "EMAIL_ADDRESS", "placeholder": "[[EMAIL_ADDRESS_1]]" }, + { "type": "PHONE_NUMBER", "placeholder": "[[PHONE_NUMBER_1]]" } + ], + "language": "en" +} +``` + +| Field | Description | +|-------|-------------| +| `masked` | Text with PII/secrets replaced by placeholders | +| `context` | Mapping of placeholder to original value (for client-side unmasking) | +| `counters` | Final counter values per entity type | +| `entities` | List of detected entities with their placeholders | +| `language` | Language used for PII detection | + +## Detection Options + +Control what gets detected using the `detect` parameter: + + + +```bash PII Only +curl -X POST http://localhost:3000/api/mask \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Email john@example.com", + "detect": ["pii"] + }' +``` + +```bash Secrets Only +curl -X POST http://localhost:3000/api/mask \ + -H "Content-Type: application/json" \ + -d '{ + "text": "-----BEGIN RSA PRIVATE KEY-----\nMIIE...", + "detect": ["secrets"] + }' +``` + +```bash Both (Default) +curl -X POST http://localhost:3000/api/mask \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Contact john@example.com with key -----BEGIN RSA PRIVATE KEY-----..." + }' +``` + + + +## Multi-Turn Support + +For conversations spanning multiple requests, use the `counters` from the response as `startFrom` in the next request to maintain consistent numbering: + +**First request:** + +```bash +curl -X POST http://localhost:3000/api/mask \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Contact john@example.com" + }' +``` + +Response: + +```json +{ + "masked": "Contact [[EMAIL_ADDRESS_1]]", + "counters": { "EMAIL_ADDRESS": 1 }, + ... +} +``` + +**Second request (continue numbering):** + +```bash +curl -X POST http://localhost:3000/api/mask \ + -H "Content-Type: application/json" \ + -d '{ + "text": "Also reach out to jane@example.com", + "startFrom": { "EMAIL_ADDRESS": 1 } + }' +``` + +Response: + +```json +{ + "masked": "Also reach out to [[EMAIL_ADDRESS_2]]", + "counters": { "EMAIL_ADDRESS": 2 }, + ... +} +``` + +## Client-Side Unmasking + +The `context` field provides a mapping for unmasking responses on the client side: + +```javascript +function unmask(text, context) { + let result = text; + for (const [placeholder, original] of Object.entries(context)) { + result = result.replaceAll(placeholder, original); + } + return result; +} + +// Usage +const response = await fetch('/api/mask', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ text: 'Email john@example.com' }) +}); +const { masked, context } = await response.json(); + +// Later, unmask an LLM response +const llmResponse = "I'll contact [[EMAIL_ADDRESS_1]] right away."; +const unmasked = unmask(llmResponse, context); +// "I'll contact john@example.com right away." +``` + +## Error Responses + +### Validation Error (400) + +```json +{ + "error": { + "message": "Invalid request", + "type": "validation_error", + "details": [ + { "path": "text", "message": "text is required" } + ] + } +} +``` + +### PII Detection Error (503) + +Returned when Presidio is unavailable: + +```json +{ + "error": { + "message": "PII detection failed", + "type": "detection_error", + "details": "Failed to connect to Presidio..." + } +} +``` diff --git a/docs/mint.json b/docs/mint.json index b34545e..395cd86 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -50,6 +50,7 @@ "pages": [ "api-reference/openai", "api-reference/anthropic", + "api-reference/mask", "api-reference/status", "api-reference/dashboard-api" ] diff --git a/src/index.ts b/src/index.ts index 0e85219..33966d9 100644 --- a/src/index.ts +++ b/src/index.ts @@ -6,6 +6,7 @@ import { logger } from "hono/logger"; import { getConfig } from "./config"; import { getPIIDetector } from "./pii/detect"; import { anthropicRoutes } from "./routes/anthropic"; +import { apiRoutes } from "./routes/api"; import { dashboardRoutes } from "./routes/dashboard"; import { healthRoutes } from "./routes/health"; import { infoRoutes } from "./routes/info"; @@ -45,6 +46,7 @@ app.route("/", healthRoutes); app.route("/", infoRoutes); app.route("/openai", openaiRoutes); app.route("/anthropic", anthropicRoutes); +app.route("/api", apiRoutes); if (config.dashboard.enabled) { app.route("/dashboard", dashboardRoutes); @@ -181,6 +183,7 @@ Provider: Server: http://${host}:${port} OpenAI API: http://${host}:${port}/openai/v1/chat/completions Anthropic: http://${host}:${port}/anthropic/v1/messages +Mask API: http://${host}:${port}/api/mask Health: http://${host}:${port}/health Info: http://${host}:${port}/info Dashboard: http://${host}:${port}/dashboard diff --git a/src/routes/api.test.ts b/src/routes/api.test.ts new file mode 100644 index 0000000..0ad1117 --- /dev/null +++ b/src/routes/api.test.ts @@ -0,0 +1,188 @@ +import { describe, expect, mock, test } from "bun:test"; +import { Hono } from "hono"; +import type { PIIEntity } from "../pii/detect"; + +// Mock the PII detector to avoid needing Presidio running +const mockDetectPII = mock<(text: string, language: string) => Promise>(() => + Promise.resolve([]), +); +mock.module("../pii/detect", () => ({ + getPIIDetector: () => ({ + detectPII: mockDetectPII, + }), +})); + +// Mock the logger to avoid database operations +mock.module("../services/logger", () => ({ + logRequest: mock(() => {}), +})); + +// Import after mocks are set up +const { apiRoutes } = await import("./api"); + +const app = new Hono(); +app.route("/api", apiRoutes); + +describe("POST /api/mask", () => { + test("returns 400 for missing text", async () => { + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({}), + }); + + expect(res.status).toBe(400); + const body = (await res.json()) as { error: { type: string } }; + expect(body.error.type).toBe("validation_error"); + }); + + test("returns 400 for empty text", async () => { + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: "" }), + }); + + expect(res.status).toBe(400); + const body = (await res.json()) as { error: { type: string } }; + expect(body.error.type).toBe("validation_error"); + }); + + test("returns 400 for whitespace-only text", async () => { + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: " " }), + }); + + expect(res.status).toBe(400); + const body = (await res.json()) as { error: { type: string } }; + expect(body.error.type).toBe("validation_error"); + }); + + test("returns masked text with no PII detected", async () => { + mockDetectPII.mockResolvedValueOnce([]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: "Hello world" }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + masked: string; + context: Record; + entities: unknown[]; + language: string; + }; + expect(body.masked).toBe("Hello world"); + expect(body.context).toEqual({}); + expect(body.entities).toEqual([]); + expect(body.language).toBeDefined(); + }); + + test("masks PII entities", async () => { + mockDetectPII.mockResolvedValueOnce([ + { entity_type: "EMAIL_ADDRESS", start: 6, end: 22, score: 0.9 }, + ]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: "Email john@example.com here" }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + masked: string; + context: Record; + counters: Record; + entities: { type: string; placeholder: string }[]; + }; + expect(body.masked).toBe("Email [[EMAIL_ADDRESS_1]] here"); + expect(body.context["[[EMAIL_ADDRESS_1]]"]).toBe("john@example.com"); + expect(body.counters.EMAIL_ADDRESS).toBe(1); + expect(body.entities).toHaveLength(1); + expect(body.entities[0].type).toBe("EMAIL_ADDRESS"); + }); + + test("respects startFrom counters", async () => { + mockDetectPII.mockResolvedValueOnce([ + { entity_type: "EMAIL_ADDRESS", start: 0, end: 16, score: 0.9 }, + ]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: "jane@example.com", + startFrom: { EMAIL_ADDRESS: 5 }, + }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + masked: string; + counters: Record; + }; + expect(body.masked).toBe("[[EMAIL_ADDRESS_6]]"); + expect(body.counters.EMAIL_ADDRESS).toBe(6); + }); + + test("respects detect parameter for PII only", async () => { + mockDetectPII.mockResolvedValueOnce([]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: "Test text", + detect: ["pii"], + }), + }); + + expect(res.status).toBe(200); + expect(mockDetectPII).toHaveBeenCalled(); + }); + + test("masks secrets when detected", async () => { + // Skip PII detection + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: "-----BEGIN RSA PRIVATE KEY-----\nMIIE...\n-----END RSA PRIVATE KEY-----", + detect: ["secrets"], + }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + masked: string; + entities: { type: string }[]; + }; + expect(body.masked).toContain("[[SECRET_MASKED_PEM_PRIVATE_KEY_1]]"); + expect(body.entities.some((e) => e.type === "PEM_PRIVATE_KEY")).toBe(true); + }); + + test("returns counters for multi-turn support", async () => { + mockDetectPII.mockResolvedValueOnce([ + { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, + { entity_type: "EMAIL_ADDRESS", start: 5, end: 21, score: 0.9 }, + ]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: "John john@example.com" }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + counters: Record; + }; + expect(body.counters.PERSON).toBe(1); + expect(body.counters.EMAIL_ADDRESS).toBe(1); + }); +}); diff --git a/src/routes/api.ts b/src/routes/api.ts new file mode 100644 index 0000000..e544b66 --- /dev/null +++ b/src/routes/api.ts @@ -0,0 +1,344 @@ +/** + * Generic masking API routes + * + * Provides standalone masking endpoints for clients that need to mask text + * independently of the OpenAI/Anthropic proxy routes. + */ + +import { Hono } from "hono"; +import { z } from "zod"; +import { getConfig, type SecretsDetectionConfig } from "../config"; +import { resolveConflicts, resolveOverlaps } from "../masking/conflict-resolver"; +import { + createPlaceholderContext, + incrementAndGenerate, + type PlaceholderContext, + replaceWithPlaceholders, +} from "../masking/context"; +import { + generatePlaceholder as generatePlaceholderFromFormat, + generateSecretPlaceholder, + PII_PLACEHOLDER_FORMAT, +} from "../masking/placeholders"; +import type { PIIEntity } from "../pii/detect"; +import { getPIIDetector } from "../pii/detect"; +import { detectSecrets, type SecretLocation } from "../secrets/detect"; +import { getLanguageDetector, type SupportedLanguage } from "../services/language-detector"; +import { logRequest } from "../services/logger"; + +export const apiRoutes = new Hono(); + +// Request schema +const MaskRequestSchema = z.object({ + text: z.string().trim().min(1, "text is required"), + language: z.string().optional(), + startFrom: z.record(z.string(), z.number()).optional(), + detect: z.array(z.enum(["pii", "secrets"])).optional(), +}); + +type MaskRequest = z.infer; + +// Response types +interface MaskEntity { + type: string; + placeholder: string; +} + +interface MaskResponse { + masked: string; + context: Record; + counters: Record; + entities: MaskEntity[]; + language: string; +} + +/** + * Generates a PII placeholder + */ +function generatePIIPlaceholder(entityType: string, context: PlaceholderContext): string { + return incrementAndGenerate(entityType, context, (type, count) => + generatePlaceholderFromFormat(PII_PLACEHOLDER_FORMAT, type, count), + ); +} + +/** + * Generates a secrets placeholder + */ +function generateSecretsPlaceholder(secretType: string, context: PlaceholderContext): string { + return incrementAndGenerate(secretType, context, generateSecretPlaceholder); +} + +/** + * Masks text with PII entities + */ +function maskWithPII( + text: string, + entities: PIIEntity[], + context: PlaceholderContext, +): { masked: string; entities: MaskEntity[] } { + if (entities.length === 0) { + return { masked: text, entities: [] }; + } + + const maskEntities: MaskEntity[] = []; + + const masked = replaceWithPlaceholders( + text, + entities, + context, + (e) => e.entity_type, + (type, ctx) => { + const placeholder = generatePIIPlaceholder(type, ctx); + maskEntities.push({ type, placeholder }); + return placeholder; + }, + resolveConflicts, + ); + + return { masked, entities: maskEntities }; +} + +/** + * Masks text with secret locations + */ +function maskWithSecrets( + text: string, + locations: SecretLocation[], + context: PlaceholderContext, +): { masked: string; entities: MaskEntity[] } { + if (locations.length === 0) { + return { masked: text, entities: [] }; + } + + const maskEntities: MaskEntity[] = []; + + const masked = replaceWithPlaceholders( + text, + locations, + context, + (loc) => loc.type, + (type, ctx) => { + const placeholder = generateSecretsPlaceholder(type, ctx); + maskEntities.push({ type, placeholder }); + return placeholder; + }, + resolveOverlaps, + ); + + return { masked, entities: maskEntities }; +} + +/** + * POST /api/mask + * + * Masks PII and secrets in text. Returns context for client-side unmasking. + */ +apiRoutes.post("/mask", async (c) => { + const startTime = Date.now(); + const config = getConfig(); + const userAgent = c.req.header("user-agent") || null; + + // Parse and validate request + const body = await c.req.json().catch(() => null); + const parseResult = MaskRequestSchema.safeParse(body); + + if (!parseResult.success) { + return c.json( + { + error: { + message: "Invalid request", + type: "validation_error", + details: parseResult.error.errors.map((e) => ({ + path: e.path.join("."), + message: e.message, + })), + }, + }, + 400, + ); + } + + const request: MaskRequest = parseResult.data; + const detectTypes = request.detect || ["pii", "secrets"]; + const detectPII = detectTypes.includes("pii"); + const detectSecretsFlag = detectTypes.includes("secrets"); + + // Initialize context with optional startFrom counters + const context = createPlaceholderContext(); + if (request.startFrom) { + for (const [type, count] of Object.entries(request.startFrom)) { + context.counters[type] = count; + } + } + + // Detect language (use provided or auto-detect) + let language: SupportedLanguage; + let languageFallback = false; + if ( + request.language && + config.pii_detection.languages.includes(request.language as SupportedLanguage) + ) { + language = request.language as SupportedLanguage; + } else { + const langResult = getLanguageDetector().detect(request.text); + language = langResult.language; + languageFallback = langResult.usedFallback; + } + + let maskedText = request.text; + const allEntities: MaskEntity[] = []; + const piiEntityTypes: string[] = []; + const secretTypes: string[] = []; + let scanTimeMs = 0; + + // Detect and mask PII + if (detectPII) { + try { + const piiStartTime = Date.now(); + const detector = getPIIDetector(); + const piiEntities = await detector.detectPII(maskedText, language); + scanTimeMs = Date.now() - piiStartTime; + + // Apply whitelist filtering + const whitelist = config.masking.whitelist; + const filteredEntities = piiEntities.filter((entity) => { + const detectedText = maskedText.slice(entity.start, entity.end); + return !whitelist.some( + (pattern) => pattern.includes(detectedText) || detectedText.includes(pattern), + ); + }); + + const piiResult = maskWithPII(maskedText, filteredEntities, context); + maskedText = piiResult.masked; + allEntities.push(...piiResult.entities); + + // Collect unique entity types for logging + for (const entity of filteredEntities) { + if (!piiEntityTypes.includes(entity.entity_type)) { + piiEntityTypes.push(entity.entity_type); + } + } + } catch (error) { + // Log the error + logRequest( + { + timestamp: new Date().toISOString(), + mode: "mask", + provider: "api", + model: "mask", + piiDetected: false, + entities: [], + latencyMs: Date.now() - startTime, + scanTimeMs: 0, + language, + languageFallback, + statusCode: 503, + errorMessage: error instanceof Error ? error.message : "PII detection failed", + }, + userAgent, + ); + + return c.json( + { + error: { + message: "PII detection failed", + type: "detection_error", + details: error instanceof Error ? error.message : "Unknown error", + }, + }, + 503, + ); + } + } + + // Detect and mask secrets + if (detectSecretsFlag && config.secrets_detection.enabled) { + try { + // Create a config for detection (always use mask action for API) + const secretsConfig: SecretsDetectionConfig = { + enabled: true, + action: "mask", + entities: config.secrets_detection.entities, + max_scan_chars: config.secrets_detection.max_scan_chars, + log_detected_types: false, + }; + + const secretsResult = detectSecrets(maskedText, secretsConfig); + + if (secretsResult.locations && secretsResult.locations.length > 0) { + const secretsMaskResult = maskWithSecrets(maskedText, secretsResult.locations, context); + maskedText = secretsMaskResult.masked; + allEntities.push(...secretsMaskResult.entities); + + // Collect unique secret types for logging + for (const match of secretsResult.matches) { + if (!secretTypes.includes(match.type)) { + secretTypes.push(match.type); + } + } + } + } catch (error) { + // Log the error + logRequest( + { + timestamp: new Date().toISOString(), + mode: "mask", + provider: "api", + model: "mask", + piiDetected: piiEntityTypes.length > 0, + entities: piiEntityTypes, + latencyMs: Date.now() - startTime, + scanTimeMs, + language, + languageFallback, + statusCode: 503, + errorMessage: error instanceof Error ? error.message : "Secrets detection failed", + }, + userAgent, + ); + + return c.json( + { + error: { + message: "Secrets detection failed", + type: "detection_error", + details: error instanceof Error ? error.message : "Unknown error", + }, + }, + 503, + ); + } + } + + // Log successful request + logRequest( + { + timestamp: new Date().toISOString(), + mode: "mask", + provider: "api", + model: "mask", + piiDetected: piiEntityTypes.length > 0, + entities: piiEntityTypes, + latencyMs: Date.now() - startTime, + scanTimeMs, + language, + languageFallback, + maskedContent: config.logging.log_masked_content ? maskedText : undefined, + secretsDetected: secretTypes.length > 0, + secretsTypes: secretTypes.length > 0 ? secretTypes : undefined, + statusCode: 200, + }, + userAgent, + ); + + // Build response + const response: MaskResponse = { + masked: maskedText, + context: context.mapping, + counters: { ...context.counters }, + entities: allEntities, + language, + }; + + return c.json(response); +}); diff --git a/src/routes/utils.ts b/src/routes/utils.ts index 54b0cc5..44732bf 100644 --- a/src/routes/utils.ts +++ b/src/routes/utils.ts @@ -207,7 +207,7 @@ export function toSecretsHeaderData( } export interface CreateLogDataOptions { - provider: "openai" | "anthropic" | "local"; + provider: "openai" | "anthropic" | "local" | "api"; model: string; startTime: number; pii?: PIILogData; diff --git a/src/services/logger.ts b/src/services/logger.ts index 93a0deb..4637a03 100644 --- a/src/services/logger.ts +++ b/src/services/logger.ts @@ -6,7 +6,7 @@ export interface RequestLog { id?: number; timestamp: string; mode: "route" | "mask"; - provider: "openai" | "anthropic" | "local"; + provider: "openai" | "anthropic" | "local" | "api"; model: string; pii_detected: boolean; entities: string; @@ -282,7 +282,7 @@ export function getLogger(): Logger { export interface RequestLogData { timestamp: string; mode: "route" | "mask"; - provider: "openai" | "anthropic" | "local"; + provider: "openai" | "anthropic" | "local" | "api"; model: string; piiDetected: boolean; entities: string[];