From: raseidl Date: Fri, 27 Feb 2026 18:55:36 +0000 (+0100) Subject: fix: preserve unknown fields in Anthropic schemas (restores prompt caching) (#74) X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=9e8006a35836bad9ae20a8f5749d57df9d9f84a9;p=sgasser-llm-shield.git fix: preserve unknown fields in Anthropic schemas (restores prompt caching) (#74) * fix: preserve cache_control in Anthropic schemas to restore prompt caching * fix: add biome-ignore for passthrough tests, extend fix to OpenAI - Add biome-ignore comments to suppress noExplicitAny in tests (required for testing unknown field preservation) - Add .passthrough() to OpenAI schemas for consistency (OpenAIMessageSchema, OpenAIContentPartSchema) - Format Anthropic schemas to match project style * test: add OpenAI passthrough tests for consistency with Anthropic - Schema tests for name, tool_calls, audio content, unknown fields - Extractor tests for field preservation through applyMasked --------- Co-authored-by: Stefan Gasser --- diff --git a/src/masking/extractors/anthropic.test.ts b/src/masking/extractors/anthropic.test.ts index bf16c69..cb49906 100644 --- a/src/masking/extractors/anthropic.test.ts +++ b/src/masking/extractors/anthropic.test.ts @@ -737,4 +737,125 @@ describe("Anthropic Text Extractor", () => { expect((result.content[0] as { text: string }).text).toBe("No placeholders here"); }); }); + + describe("cache_control preservation", () => { + test("preserves cache_control on text block through applyMasked", () => { + const request = createRequest([ + { + role: "user", + content: [ + { + type: "text", + text: "Contact john@example.com", + cache_control: { type: "ephemeral" }, + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ], + }, + ]); + + const maskedSpans = [ + { + path: "messages[0].content[0].text", + maskedText: "Contact [[EMAIL_ADDRESS_1]]", + messageIndex: 0, + partIndex: 0, + }, + ]; + + const result = anthropicExtractor.applyMasked(request, maskedSpans); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const block = (result.messages[0].content as any[])[0]; + + expect(block.text).toBe("Contact [[EMAIL_ADDRESS_1]]"); + expect(block.cache_control).toEqual({ type: "ephemeral" }); + }); + + test("preserves cache_control on system prompt block through applyMasked", () => { + const request = createRequest( + [{ role: "user", content: "Hello" }], + [ + { + type: "text", + text: "You are an assistant. User is John Doe.", + cache_control: { type: "ephemeral" }, + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ], + ); + + const maskedSpans = [ + { + path: "system[0].text", + maskedText: "You are an assistant. User is [[PERSON_1]].", + messageIndex: -1, + partIndex: 0, + }, + ]; + + const result = anthropicExtractor.applyMasked(request, maskedSpans); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const block = (result.system as any[])[0]; + + expect(block.text).toBe("You are an assistant. User is [[PERSON_1]]."); + expect(block.cache_control).toEqual({ type: "ephemeral" }); + }); + + test("preserves unknown fields on message through applyMasked", () => { + const request = createRequest([ + { + role: "user", + content: "Hello", + extra_field: "preserved", + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ]); + + const maskedSpans = [ + { + path: "messages[0].content", + maskedText: "Hello", + messageIndex: 0, + partIndex: 0, + }, + ]; + + const result = anthropicExtractor.applyMasked(request, maskedSpans); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[0] as any).extra_field).toBe("preserved"); + }); + + test("preserves cache_control when no masking is applied", () => { + const request = createRequest([ + { + role: "user", + content: [ + { + type: "text", + text: "No PII here", + cache_control: { type: "ephemeral" }, + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ], + }, + ]); + + // applyMasked with no-op span (text unchanged) + const maskedSpans = [ + { + path: "messages[0].content[0].text", + maskedText: "No PII here", + messageIndex: 0, + partIndex: 0, + }, + ]; + + const result = anthropicExtractor.applyMasked(request, maskedSpans); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const block = (result.messages[0].content as any[])[0]; + + expect(block.cache_control).toEqual({ type: "ephemeral" }); + }); + }); }); diff --git a/src/masking/extractors/openai.test.ts b/src/masking/extractors/openai.test.ts index 9ae4eed..e1078c6 100644 --- a/src/masking/extractors/openai.test.ts +++ b/src/masking/extractors/openai.test.ts @@ -299,4 +299,98 @@ describe("OpenAI Text Extractor", () => { expect(result.choices[0].message.content).toBeNull(); }); }); + + describe("unknown field preservation", () => { + test("preserves name field on message through applyMasked", () => { + const request = createRequest([ + { + role: "user", + content: "Contact john@example.com", + name: "test_user", + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ]); + + const maskedSpans = [ + { + path: "messages[0].content", + maskedText: "Contact [[EMAIL_ADDRESS_1]]", + messageIndex: 0, + partIndex: 0, + }, + ]; + + const result = openaiExtractor.applyMasked(request, maskedSpans); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[0] as any).name).toBe("test_user"); + expect(result.messages[0].content).toBe("Contact [[EMAIL_ADDRESS_1]]"); + }); + + test("preserves tool_calls on assistant message through applyMasked", () => { + const request = createRequest([ + { role: "user", content: "What is the weather?" }, + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_123", + type: "function", + function: { name: "get_weather", arguments: "{}" }, + }, + ], + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ]); + + const maskedSpans = [ + { + path: "messages[0].content", + maskedText: "What is the weather?", + messageIndex: 0, + partIndex: 0, + }, + ]; + + const result = openaiExtractor.applyMasked(request, maskedSpans); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[1] as any).tool_calls).toHaveLength(1); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[1] as any).tool_calls[0].id).toBe("call_123"); + }); + + test("preserves unknown fields on content part through applyMasked", () => { + const request = createRequest([ + { + role: "user", + content: [ + { + type: "text", + text: "Hello John Doe", + custom_field: "preserved", + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + } as any, + ], + }, + ]); + + const maskedSpans = [ + { + path: "messages[0].content[0].text", + maskedText: "Hello [[PERSON_1]]", + messageIndex: 0, + partIndex: 0, + }, + ]; + + const result = openaiExtractor.applyMasked(request, maskedSpans); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const part = (result.messages[0].content as any[])[0]; + expect(part.text).toBe("Hello [[PERSON_1]]"); + expect(part.custom_field).toBe("preserved"); + }); + }); }); diff --git a/src/providers/anthropic/types.ts b/src/providers/anthropic/types.ts index 08956d7..69f6d4f 100644 --- a/src/providers/anthropic/types.ts +++ b/src/providers/anthropic/types.ts @@ -6,47 +6,64 @@ import { z } from "zod"; // Content block types -export const TextBlockSchema = z.object({ - type: z.literal("text"), - text: z.string(), -}); +// All schemas use .passthrough() to preserve fields PasteGuard doesn't need to inspect +// (e.g. cache_control, citations). Without this, Zod silently strips unknown fields, +// breaking features like Anthropic prompt caching. +export const TextBlockSchema = z + .object({ + type: z.literal("text"), + text: z.string(), + }) + .passthrough(); -export const ImageBlockSchema = z.object({ - type: z.literal("image"), - source: z.object({ - type: z.enum(["base64", "url"]), - media_type: z.string().optional(), - data: z.string().optional(), - url: z.string().optional(), - }), -}); +export const ImageBlockSchema = z + .object({ + type: z.literal("image"), + source: z + .object({ + type: z.enum(["base64", "url"]), + media_type: z.string().optional(), + data: z.string().optional(), + url: z.string().optional(), + }) + .passthrough(), + }) + .passthrough(); -export const ToolUseBlockSchema = z.object({ - type: z.literal("tool_use"), - id: z.string(), - name: z.string(), - input: z.record(z.unknown()), -}); +export const ToolUseBlockSchema = z + .object({ + type: z.literal("tool_use"), + id: z.string(), + name: z.string(), + input: z.record(z.unknown()), + }) + .passthrough(); -export const ThinkingBlockSchema = z.object({ - type: z.literal("thinking"), - thinking: z.string(), - signature: z.string().optional(), -}); +export const ThinkingBlockSchema = z + .object({ + type: z.literal("thinking"), + thinking: z.string(), + signature: z.string().optional(), + }) + .passthrough(); -export const RedactedThinkingBlockSchema = z.object({ - type: z.literal("redacted_thinking"), - data: z.string(), -}); +export const RedactedThinkingBlockSchema = z + .object({ + type: z.literal("redacted_thinking"), + data: z.string(), + }) + .passthrough(); // ToolResultBlock can contain nested content blocks, so we define it with z.any() for content // and provide proper type separately -export const ToolResultBlockSchema = z.object({ - type: z.literal("tool_result"), - tool_use_id: z.string(), - content: z.union([z.string(), z.array(z.any())]), - is_error: z.boolean().optional(), -}); +export const ToolResultBlockSchema = z + .object({ + type: z.literal("tool_result"), + tool_use_id: z.string(), + content: z.union([z.string(), z.array(z.any())]), + is_error: z.boolean().optional(), + }) + .passthrough(); export const ContentBlockSchema = z.discriminatedUnion("type", [ TextBlockSchema, @@ -58,20 +75,26 @@ export const ContentBlockSchema = z.discriminatedUnion("type", [ ]); // Message and request types -export const AnthropicMessageSchema = z.object({ - role: z.enum(["user", "assistant"]), - content: z.union([z.string(), z.array(ContentBlockSchema)]), -}); +export const AnthropicMessageSchema = z + .object({ + role: z.enum(["user", "assistant"]), + content: z.union([z.string(), z.array(ContentBlockSchema)]), + }) + .passthrough(); -export const ToolSchema = z.object({ - name: z.string(), - description: z.string().optional(), - input_schema: z.object({ - type: z.literal("object"), - properties: z.record(z.unknown()).optional(), - required: z.array(z.string()).optional(), - }), -}); +export const ToolSchema = z + .object({ + name: z.string(), + description: z.string().optional(), + input_schema: z + .object({ + type: z.literal("object"), + properties: z.record(z.unknown()).optional(), + required: z.array(z.string()).optional(), + }) + .passthrough(), + }) + .passthrough(); export const AnthropicRequestSchema = z .object({ @@ -85,13 +108,14 @@ export const AnthropicRequestSchema = z type: z.enum(["auto", "any", "tool"]), name: z.string().optional(), }) + .passthrough() .optional(), stream: z.boolean().optional(), temperature: z.number().optional(), top_p: z.number().optional(), top_k: z.number().optional(), stop_sequences: z.array(z.string()).optional(), - metadata: z.object({ user_id: z.string().optional() }).optional(), + metadata: z.object({ user_id: z.string().optional() }).passthrough().optional(), }) .passthrough(); diff --git a/src/providers/openai/types.ts b/src/providers/openai/types.ts index 7500d8d..7c2e28c 100644 --- a/src/providers/openai/types.ts +++ b/src/providers/openai/types.ts @@ -6,16 +6,21 @@ import { z } from "zod"; // Content part for multimodal messages -export const OpenAIContentPartSchema = z.object({ - type: z.string(), - text: z.string().optional(), - image_url: z - .object({ - url: z.string(), - detail: z.string().optional(), - }) - .optional(), -}); +// All schemas use .passthrough() to preserve fields PasteGuard doesn't need to inspect +// (e.g. input_audio, file). Without this, Zod silently strips unknown fields. +export const OpenAIContentPartSchema = z + .object({ + type: z.string(), + text: z.string().optional(), + image_url: z + .object({ + url: z.string(), + detail: z.string().optional(), + }) + .passthrough() + .optional(), + }) + .passthrough(); // Message content: string, array (multimodal), or null export const OpenAIMessageContentSchema = z.union([ @@ -25,10 +30,12 @@ export const OpenAIMessageContentSchema = z.union([ ]); // Chat message -export const OpenAIMessageSchema = z.object({ - role: z.enum(["system", "developer", "user", "assistant", "tool", "function"]), - content: OpenAIMessageContentSchema.optional(), -}); +export const OpenAIMessageSchema = z + .object({ + role: z.enum(["system", "developer", "user", "assistant", "tool", "function"]), + content: OpenAIMessageContentSchema.optional(), + }) + .passthrough(); // Chat completion request - minimal required fields, rest passthrough export const OpenAIRequestSchema = z diff --git a/src/routes/anthropic.test.ts b/src/routes/anthropic.test.ts index cc93dcf..663b6c4 100644 --- a/src/routes/anthropic.test.ts +++ b/src/routes/anthropic.test.ts @@ -1,5 +1,6 @@ import { describe, expect, test } from "bun:test"; import { Hono } from "hono"; +import { AnthropicRequestSchema } from "../providers/anthropic/types"; import { anthropicRoutes } from "./anthropic"; const app = new Hono(); @@ -68,3 +69,82 @@ describe("POST /anthropic/v1/messages", () => { expect(res.status).toBe(400); }); }); + +describe("Zod schema preserves cache_control and unknown fields", () => { + const base = { + model: "claude-3-sonnet-20240229", + max_tokens: 1024, + messages: [{ role: "user", content: "Hello" }], + }; + + test("preserves cache_control on text content block", () => { + const input = { + ...base, + messages: [ + { + role: "user", + content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral" } }], + }, + ], + }; + + const result = AnthropicRequestSchema.parse(input); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const block = (result.messages[0].content as any[])[0]; + + expect(block.cache_control).toEqual({ type: "ephemeral" }); + }); + + test("preserves cache_control on system prompt block", () => { + const input = { + ...base, + system: [{ type: "text", text: "You are helpful.", cache_control: { type: "ephemeral" } }], + }; + + const result = AnthropicRequestSchema.parse(input); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const block = (result.system as any[])[0]; + + expect(block.cache_control).toEqual({ type: "ephemeral" }); + }); + + test("preserves cache_control on tool definition", () => { + const input = { + ...base, + tools: [ + { + name: "get_weather", + description: "Get weather", + input_schema: { type: "object", properties: { city: { type: "string" } } }, + cache_control: { type: "ephemeral" }, + }, + ], + }; + + const result = AnthropicRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.tools![0] as any).cache_control).toEqual({ type: "ephemeral" }); + }); + + test("preserves cache_control on message", () => { + const input = { + ...base, + messages: [{ role: "user", content: "Hello", cache_control: { type: "ephemeral" } }], + }; + + const result = AnthropicRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[0] as any).cache_control).toEqual({ type: "ephemeral" }); + }); + + test("preserves unknown top-level fields", () => { + const input = { ...base, custom_field: "preserved" }; + + const result = AnthropicRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result as any).custom_field).toBe("preserved"); + }); +}); diff --git a/src/routes/openai.test.ts b/src/routes/openai.test.ts index d7fda9e..3cf49a8 100644 --- a/src/routes/openai.test.ts +++ b/src/routes/openai.test.ts @@ -1,5 +1,6 @@ import { describe, expect, test } from "bun:test"; import { Hono } from "hono"; +import { OpenAIRequestSchema } from "../providers/openai/types"; import { openaiRoutes } from "./openai"; const app = new Hono(); @@ -43,3 +44,76 @@ describe("POST /openai/v1/chat/completions", () => { expect(res.status).toBe(400); }); }); + +describe("Zod schema preserves unknown fields", () => { + const base = { + model: "gpt-4o", + messages: [{ role: "user", content: "Hello" }], + }; + + test("preserves name field on message", () => { + const input = { + ...base, + messages: [{ role: "user", content: "Hello", name: "test_user" }], + }; + + const result = OpenAIRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[0] as any).name).toBe("test_user"); + }); + + test("preserves tool_calls on assistant message", () => { + const input = { + ...base, + messages: [ + { + role: "assistant", + content: null, + tool_calls: [ + { + id: "call_123", + type: "function", + function: { name: "get_weather", arguments: "{}" }, + }, + ], + }, + ], + }; + + const result = OpenAIRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[0] as any).tool_calls).toHaveLength(1); + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result.messages[0] as any).tool_calls[0].id).toBe("call_123"); + }); + + test("preserves audio content part fields", () => { + const input = { + ...base, + messages: [ + { + role: "user", + content: [{ type: "input_audio", input_audio: { data: "base64...", format: "wav" } }], + }, + ], + }; + + const result = OpenAIRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + const part = (result.messages[0].content as any[])[0]; + expect(part.type).toBe("input_audio"); + expect(part.input_audio.format).toBe("wav"); + }); + + test("preserves unknown top-level fields", () => { + const input = { ...base, custom_field: "preserved" }; + + const result = OpenAIRequestSchema.parse(input); + + // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation + expect((result as any).custom_field).toBe("preserved"); + }); +});