From: Stefan Gasser Date: Mon, 26 Jan 2026 07:13:59 +0000 (+0100) Subject: Fix entity extraction and improve API consistency X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=93cedf6191c599785b30e0e8e712995a582bbe98;p=sgasser-llm-shield.git Fix entity extraction and improve API consistency - Use direct placeholder lookup instead of fragile substring matching - Make error details always an array for consistent response format - Add languageFallback field to response - Add tests for error paths and edge cases --- diff --git a/docs/api-reference/mask.mdx b/docs/api-reference/mask.mdx index 7cd593c..82f81c3 100644 --- a/docs/api-reference/mask.mdx +++ b/docs/api-reference/mask.mdx @@ -54,7 +54,8 @@ curl -X POST http://localhost:3000/api/mask \ { "type": "EMAIL_ADDRESS", "placeholder": "[[EMAIL_ADDRESS_1]]" }, { "type": "PHONE_NUMBER", "placeholder": "[[PHONE_NUMBER_1]]" } ], - "language": "en" + "language": "en", + "languageFallback": false } ``` @@ -65,6 +66,7 @@ curl -X POST http://localhost:3000/api/mask \ | `counters` | Final counter values per entity type | | `entities` | List of detected entities with their placeholders | | `language` | Language used for PII detection | +| `languageFallback` | Whether the configured fallback language was used (auto-detection failed) | ## Detection Options @@ -188,16 +190,18 @@ const unmasked = unmask(llmResponse, context); } ``` -### PII Detection Error (503) +### Detection Error (503) -Returned when Presidio is unavailable: +Returned when Presidio or secrets detection is unavailable: ```json { "error": { "message": "PII detection failed", "type": "detection_error", - "details": "Failed to connect to Presidio..." + "details": [ + { "message": "Failed to connect to Presidio..." } + ] } } ``` diff --git a/src/routes/api.test.ts b/src/routes/api.test.ts index 6d6d7d0..589e241 100644 --- a/src/routes/api.test.ts +++ b/src/routes/api.test.ts @@ -219,4 +219,97 @@ describe("POST /api/mask", () => { expect(body.entities.some((e) => e.type === "EMAIL_ADDRESS")).toBe(true); expect(body.entities.some((e) => e.type === "PEM_PRIVATE_KEY")).toBe(true); }); + + test("returns 400 for malformed JSON", async () => { + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: "not valid json", + }); + + expect(res.status).toBe(400); + const body = (await res.json()) as { error: { type: string } }; + expect(body.error.type).toBe("validation_error"); + }); + + test("returns 503 when PII detection fails", async () => { + mockDetectPII.mockRejectedValueOnce(new Error("Presidio connection failed")); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: "Contact john@example.com" }), + }); + + expect(res.status).toBe(503); + const body = (await res.json()) as { + error: { type: string; message: string; details: { message: string }[] }; + }; + expect(body.error.type).toBe("detection_error"); + expect(body.error.message).toBe("PII detection failed"); + expect(body.error.details[0].message).toBe("Presidio connection failed"); + }); + + test("includes languageFallback in response", async () => { + mockDetectPII.mockResolvedValueOnce([]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: "Hello world" }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + languageFallback: boolean; + }; + expect(typeof body.languageFallback).toBe("boolean"); + }); + + test("respects multiple entity types in startFrom", async () => { + mockDetectPII.mockResolvedValueOnce([ + { entity_type: "PERSON", start: 0, end: 4, score: 0.9 }, + { entity_type: "EMAIL_ADDRESS", start: 5, end: 21, score: 0.9 }, + ]); + + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: "John john@example.com", + startFrom: { PERSON: 3, EMAIL_ADDRESS: 7 }, + }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + masked: string; + counters: Record; + }; + expect(body.masked).toContain("[[PERSON_4]]"); + expect(body.masked).toContain("[[EMAIL_ADDRESS_8]]"); + expect(body.counters.PERSON).toBe(4); + expect(body.counters.EMAIL_ADDRESS).toBe(8); + }); + + test("skips both detections when detect is empty array", async () => { + const res = await app.request("/api/mask", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + text: "john@example.com -----BEGIN RSA PRIVATE KEY-----\nMIIE...\n-----END RSA PRIVATE KEY-----", + detect: [], + }), + }); + + expect(res.status).toBe(200); + const body = (await res.json()) as { + masked: string; + entities: unknown[]; + }; + // Nothing should be masked when detect is empty + expect(body.masked).toContain("john@example.com"); + expect(body.masked).toContain("-----BEGIN RSA PRIVATE KEY-----"); + expect(body.entities).toHaveLength(0); + }); }); diff --git a/src/routes/api.ts b/src/routes/api.ts index 46b382d..fe4205d 100644 --- a/src/routes/api.ts +++ b/src/routes/api.ts @@ -41,6 +41,7 @@ interface MaskResponse { counters: Record; entities: MaskEntity[]; language: string; + languageFallback: boolean; } /** @@ -49,6 +50,7 @@ interface MaskResponse { function extractEntities( countersBefore: Record, context: PlaceholderContext, + isSecret: boolean, ): MaskEntity[] { const entities: MaskEntity[] = []; @@ -56,9 +58,10 @@ function extractEntities( const startCount = countersBefore[type] || 0; // Add entities for each new placeholder created for (let i = startCount + 1; i <= count; i++) { - // Find the placeholder in the mapping - const placeholder = Object.keys(context.mapping).find((p) => p.includes(`${type}_${i}]`)); - if (placeholder) { + // Build placeholder directly using known format + const placeholder = isSecret ? `[[SECRET_MASKED_${type}_${i}]]` : `[[${type}_${i}]]`; + + if (context.mapping[placeholder]) { entities.push({ type, placeholder }); } } @@ -149,7 +152,7 @@ apiRoutes.post("/mask", async (c) => { const countersBefore = { ...context.counters }; const piiResult = maskPII(maskedText, filteredEntities, context); maskedText = piiResult.masked; - allEntities.push(...extractEntities(countersBefore, piiResult.context)); + allEntities.push(...extractEntities(countersBefore, piiResult.context, false)); // Collect unique entity types for logging for (const entity of filteredEntities) { @@ -176,7 +179,7 @@ apiRoutes.post("/mask", async (c) => { error: { message: "PII detection failed", type: "detection_error", - details: error instanceof Error ? error.message : "Unknown error", + details: [{ message: error instanceof Error ? error.message : "Unknown error" }], }, }, 503, @@ -203,7 +206,7 @@ apiRoutes.post("/mask", async (c) => { const countersBefore = { ...context.counters }; const secretsMaskResult = maskSecrets(maskedText, secretsResult.locations, context); maskedText = secretsMaskResult.masked; - allEntities.push(...extractEntities(countersBefore, secretsMaskResult.context)); + allEntities.push(...extractEntities(countersBefore, secretsMaskResult.context, true)); // Collect unique secret types for logging for (const match of secretsResult.matches) { @@ -237,7 +240,7 @@ apiRoutes.post("/mask", async (c) => { error: { message: "Secrets detection failed", type: "detection_error", - details: error instanceof Error ? error.message : "Unknown error", + details: [{ message: error instanceof Error ? error.message : "Unknown error" }], }, }, 503, @@ -273,6 +276,7 @@ apiRoutes.post("/mask", async (c) => { counters: { ...context.counters }, entities: allEntities, language, + languageFallback, }; return c.json(response);