fix: preserve unknown fields in Anthropic schemas (restores prompt caching) (#74)
authorraseidl <redacted>
Fri, 27 Feb 2026 18:55:36 +0000 (19:55 +0100)
committerGitHub <redacted>
Fri, 27 Feb 2026 18:55:36 +0000 (19:55 +0100)
* fix: preserve cache_control in Anthropic schemas to restore prompt caching

* fix: add biome-ignore for passthrough tests, extend fix to OpenAI

- Add biome-ignore comments to suppress noExplicitAny in tests
  (required for testing unknown field preservation)
- Add .passthrough() to OpenAI schemas for consistency
  (OpenAIMessageSchema, OpenAIContentPartSchema)
- Format Anthropic schemas to match project style

* test: add OpenAI passthrough tests for consistency with Anthropic

- Schema tests for name, tool_calls, audio content, unknown fields
- Extractor tests for field preservation through applyMasked

---------

Co-authored-by: Stefan Gasser <redacted>
src/masking/extractors/anthropic.test.ts
src/masking/extractors/openai.test.ts
src/providers/anthropic/types.ts
src/providers/openai/types.ts
src/routes/anthropic.test.ts
src/routes/openai.test.ts

index bf16c69514c221598f5cafb76db6ef9b7f95a2e3..cb4990645f92598b3c64bdbb83698078a291b53c 100644 (file)
@@ -737,4 +737,125 @@ describe("Anthropic Text Extractor", () => {
       expect((result.content[0] as { text: string }).text).toBe("No placeholders here");
     });
   });
+
+  describe("cache_control preservation", () => {
+    test("preserves cache_control on text block through applyMasked", () => {
+      const request = createRequest([
+        {
+          role: "user",
+          content: [
+            {
+              type: "text",
+              text: "Contact john@example.com",
+              cache_control: { type: "ephemeral" },
+              // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+            } as any,
+          ],
+        },
+      ]);
+
+      const maskedSpans = [
+        {
+          path: "messages[0].content[0].text",
+          maskedText: "Contact [[EMAIL_ADDRESS_1]]",
+          messageIndex: 0,
+          partIndex: 0,
+        },
+      ];
+
+      const result = anthropicExtractor.applyMasked(request, maskedSpans);
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      const block = (result.messages[0].content as any[])[0];
+
+      expect(block.text).toBe("Contact [[EMAIL_ADDRESS_1]]");
+      expect(block.cache_control).toEqual({ type: "ephemeral" });
+    });
+
+    test("preserves cache_control on system prompt block through applyMasked", () => {
+      const request = createRequest(
+        [{ role: "user", content: "Hello" }],
+        [
+          {
+            type: "text",
+            text: "You are an assistant. User is John Doe.",
+            cache_control: { type: "ephemeral" },
+            // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+          } as any,
+        ],
+      );
+
+      const maskedSpans = [
+        {
+          path: "system[0].text",
+          maskedText: "You are an assistant. User is [[PERSON_1]].",
+          messageIndex: -1,
+          partIndex: 0,
+        },
+      ];
+
+      const result = anthropicExtractor.applyMasked(request, maskedSpans);
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      const block = (result.system as any[])[0];
+
+      expect(block.text).toBe("You are an assistant. User is [[PERSON_1]].");
+      expect(block.cache_control).toEqual({ type: "ephemeral" });
+    });
+
+    test("preserves unknown fields on message through applyMasked", () => {
+      const request = createRequest([
+        {
+          role: "user",
+          content: "Hello",
+          extra_field: "preserved",
+          // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+        } as any,
+      ]);
+
+      const maskedSpans = [
+        {
+          path: "messages[0].content",
+          maskedText: "Hello",
+          messageIndex: 0,
+          partIndex: 0,
+        },
+      ];
+
+      const result = anthropicExtractor.applyMasked(request, maskedSpans);
+
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      expect((result.messages[0] as any).extra_field).toBe("preserved");
+    });
+
+    test("preserves cache_control when no masking is applied", () => {
+      const request = createRequest([
+        {
+          role: "user",
+          content: [
+            {
+              type: "text",
+              text: "No PII here",
+              cache_control: { type: "ephemeral" },
+              // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+            } as any,
+          ],
+        },
+      ]);
+
+      // applyMasked with no-op span (text unchanged)
+      const maskedSpans = [
+        {
+          path: "messages[0].content[0].text",
+          maskedText: "No PII here",
+          messageIndex: 0,
+          partIndex: 0,
+        },
+      ];
+
+      const result = anthropicExtractor.applyMasked(request, maskedSpans);
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      const block = (result.messages[0].content as any[])[0];
+
+      expect(block.cache_control).toEqual({ type: "ephemeral" });
+    });
+  });
 });
index 9ae4eedd2f4de7dbae28d6b0fc3dc2a5853e83a6..e1078c688cda14f8c1e28109fcb43a35868de1ba 100644 (file)
@@ -299,4 +299,98 @@ describe("OpenAI Text Extractor", () => {
       expect(result.choices[0].message.content).toBeNull();
     });
   });
+
+  describe("unknown field preservation", () => {
+    test("preserves name field on message through applyMasked", () => {
+      const request = createRequest([
+        {
+          role: "user",
+          content: "Contact john@example.com",
+          name: "test_user",
+          // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+        } as any,
+      ]);
+
+      const maskedSpans = [
+        {
+          path: "messages[0].content",
+          maskedText: "Contact [[EMAIL_ADDRESS_1]]",
+          messageIndex: 0,
+          partIndex: 0,
+        },
+      ];
+
+      const result = openaiExtractor.applyMasked(request, maskedSpans);
+
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      expect((result.messages[0] as any).name).toBe("test_user");
+      expect(result.messages[0].content).toBe("Contact [[EMAIL_ADDRESS_1]]");
+    });
+
+    test("preserves tool_calls on assistant message through applyMasked", () => {
+      const request = createRequest([
+        { role: "user", content: "What is the weather?" },
+        {
+          role: "assistant",
+          content: null,
+          tool_calls: [
+            {
+              id: "call_123",
+              type: "function",
+              function: { name: "get_weather", arguments: "{}" },
+            },
+          ],
+          // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+        } as any,
+      ]);
+
+      const maskedSpans = [
+        {
+          path: "messages[0].content",
+          maskedText: "What is the weather?",
+          messageIndex: 0,
+          partIndex: 0,
+        },
+      ];
+
+      const result = openaiExtractor.applyMasked(request, maskedSpans);
+
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      expect((result.messages[1] as any).tool_calls).toHaveLength(1);
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      expect((result.messages[1] as any).tool_calls[0].id).toBe("call_123");
+    });
+
+    test("preserves unknown fields on content part through applyMasked", () => {
+      const request = createRequest([
+        {
+          role: "user",
+          content: [
+            {
+              type: "text",
+              text: "Hello John Doe",
+              custom_field: "preserved",
+              // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+            } as any,
+          ],
+        },
+      ]);
+
+      const maskedSpans = [
+        {
+          path: "messages[0].content[0].text",
+          maskedText: "Hello [[PERSON_1]]",
+          messageIndex: 0,
+          partIndex: 0,
+        },
+      ];
+
+      const result = openaiExtractor.applyMasked(request, maskedSpans);
+
+      // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+      const part = (result.messages[0].content as any[])[0];
+      expect(part.text).toBe("Hello [[PERSON_1]]");
+      expect(part.custom_field).toBe("preserved");
+    });
+  });
 });
index 08956d77df4e59990575d866ddf7375c9613ca1c..69f6d4f50348f99a520ac42dcfddcc1e895cd15c 100644 (file)
@@ -6,47 +6,64 @@
 import { z } from "zod";
 
 // Content block types
-export const TextBlockSchema = z.object({
-  type: z.literal("text"),
-  text: z.string(),
-});
+// All schemas use .passthrough() to preserve fields PasteGuard doesn't need to inspect
+// (e.g. cache_control, citations). Without this, Zod silently strips unknown fields,
+// breaking features like Anthropic prompt caching.
+export const TextBlockSchema = z
+  .object({
+    type: z.literal("text"),
+    text: z.string(),
+  })
+  .passthrough();
 
-export const ImageBlockSchema = z.object({
-  type: z.literal("image"),
-  source: z.object({
-    type: z.enum(["base64", "url"]),
-    media_type: z.string().optional(),
-    data: z.string().optional(),
-    url: z.string().optional(),
-  }),
-});
+export const ImageBlockSchema = z
+  .object({
+    type: z.literal("image"),
+    source: z
+      .object({
+        type: z.enum(["base64", "url"]),
+        media_type: z.string().optional(),
+        data: z.string().optional(),
+        url: z.string().optional(),
+      })
+      .passthrough(),
+  })
+  .passthrough();
 
-export const ToolUseBlockSchema = z.object({
-  type: z.literal("tool_use"),
-  id: z.string(),
-  name: z.string(),
-  input: z.record(z.unknown()),
-});
+export const ToolUseBlockSchema = z
+  .object({
+    type: z.literal("tool_use"),
+    id: z.string(),
+    name: z.string(),
+    input: z.record(z.unknown()),
+  })
+  .passthrough();
 
-export const ThinkingBlockSchema = z.object({
-  type: z.literal("thinking"),
-  thinking: z.string(),
-  signature: z.string().optional(),
-});
+export const ThinkingBlockSchema = z
+  .object({
+    type: z.literal("thinking"),
+    thinking: z.string(),
+    signature: z.string().optional(),
+  })
+  .passthrough();
 
-export const RedactedThinkingBlockSchema = z.object({
-  type: z.literal("redacted_thinking"),
-  data: z.string(),
-});
+export const RedactedThinkingBlockSchema = z
+  .object({
+    type: z.literal("redacted_thinking"),
+    data: z.string(),
+  })
+  .passthrough();
 
 // ToolResultBlock can contain nested content blocks, so we define it with z.any() for content
 // and provide proper type separately
-export const ToolResultBlockSchema = z.object({
-  type: z.literal("tool_result"),
-  tool_use_id: z.string(),
-  content: z.union([z.string(), z.array(z.any())]),
-  is_error: z.boolean().optional(),
-});
+export const ToolResultBlockSchema = z
+  .object({
+    type: z.literal("tool_result"),
+    tool_use_id: z.string(),
+    content: z.union([z.string(), z.array(z.any())]),
+    is_error: z.boolean().optional(),
+  })
+  .passthrough();
 
 export const ContentBlockSchema = z.discriminatedUnion("type", [
   TextBlockSchema,
@@ -58,20 +75,26 @@ export const ContentBlockSchema = z.discriminatedUnion("type", [
 ]);
 
 // Message and request types
-export const AnthropicMessageSchema = z.object({
-  role: z.enum(["user", "assistant"]),
-  content: z.union([z.string(), z.array(ContentBlockSchema)]),
-});
+export const AnthropicMessageSchema = z
+  .object({
+    role: z.enum(["user", "assistant"]),
+    content: z.union([z.string(), z.array(ContentBlockSchema)]),
+  })
+  .passthrough();
 
-export const ToolSchema = z.object({
-  name: z.string(),
-  description: z.string().optional(),
-  input_schema: z.object({
-    type: z.literal("object"),
-    properties: z.record(z.unknown()).optional(),
-    required: z.array(z.string()).optional(),
-  }),
-});
+export const ToolSchema = z
+  .object({
+    name: z.string(),
+    description: z.string().optional(),
+    input_schema: z
+      .object({
+        type: z.literal("object"),
+        properties: z.record(z.unknown()).optional(),
+        required: z.array(z.string()).optional(),
+      })
+      .passthrough(),
+  })
+  .passthrough();
 
 export const AnthropicRequestSchema = z
   .object({
@@ -85,13 +108,14 @@ export const AnthropicRequestSchema = z
         type: z.enum(["auto", "any", "tool"]),
         name: z.string().optional(),
       })
+      .passthrough()
       .optional(),
     stream: z.boolean().optional(),
     temperature: z.number().optional(),
     top_p: z.number().optional(),
     top_k: z.number().optional(),
     stop_sequences: z.array(z.string()).optional(),
-    metadata: z.object({ user_id: z.string().optional() }).optional(),
+    metadata: z.object({ user_id: z.string().optional() }).passthrough().optional(),
   })
   .passthrough();
 
index 7500d8d4329fe27a0254c18708ffe7c7f89fc358..7c2e28c90322d2da7d96b7ad6bb393d81221d9e2 100644 (file)
@@ -6,16 +6,21 @@
 import { z } from "zod";
 
 // Content part for multimodal messages
-export const OpenAIContentPartSchema = z.object({
-  type: z.string(),
-  text: z.string().optional(),
-  image_url: z
-    .object({
-      url: z.string(),
-      detail: z.string().optional(),
-    })
-    .optional(),
-});
+// All schemas use .passthrough() to preserve fields PasteGuard doesn't need to inspect
+// (e.g. input_audio, file). Without this, Zod silently strips unknown fields.
+export const OpenAIContentPartSchema = z
+  .object({
+    type: z.string(),
+    text: z.string().optional(),
+    image_url: z
+      .object({
+        url: z.string(),
+        detail: z.string().optional(),
+      })
+      .passthrough()
+      .optional(),
+  })
+  .passthrough();
 
 // Message content: string, array (multimodal), or null
 export const OpenAIMessageContentSchema = z.union([
@@ -25,10 +30,12 @@ export const OpenAIMessageContentSchema = z.union([
 ]);
 
 // Chat message
-export const OpenAIMessageSchema = z.object({
-  role: z.enum(["system", "developer", "user", "assistant", "tool", "function"]),
-  content: OpenAIMessageContentSchema.optional(),
-});
+export const OpenAIMessageSchema = z
+  .object({
+    role: z.enum(["system", "developer", "user", "assistant", "tool", "function"]),
+    content: OpenAIMessageContentSchema.optional(),
+  })
+  .passthrough();
 
 // Chat completion request - minimal required fields, rest passthrough
 export const OpenAIRequestSchema = z
index cc93dcf46c4cb04ac8b1a79e8b02586dd2ee82f5..663b6c4b482360c0704eb3fa778989257208b7a8 100644 (file)
@@ -1,5 +1,6 @@
 import { describe, expect, test } from "bun:test";
 import { Hono } from "hono";
+import { AnthropicRequestSchema } from "../providers/anthropic/types";
 import { anthropicRoutes } from "./anthropic";
 
 const app = new Hono();
@@ -68,3 +69,82 @@ describe("POST /anthropic/v1/messages", () => {
     expect(res.status).toBe(400);
   });
 });
+
+describe("Zod schema preserves cache_control and unknown fields", () => {
+  const base = {
+    model: "claude-3-sonnet-20240229",
+    max_tokens: 1024,
+    messages: [{ role: "user", content: "Hello" }],
+  };
+
+  test("preserves cache_control on text content block", () => {
+    const input = {
+      ...base,
+      messages: [
+        {
+          role: "user",
+          content: [{ type: "text", text: "Hello", cache_control: { type: "ephemeral" } }],
+        },
+      ],
+    };
+
+    const result = AnthropicRequestSchema.parse(input);
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    const block = (result.messages[0].content as any[])[0];
+
+    expect(block.cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  test("preserves cache_control on system prompt block", () => {
+    const input = {
+      ...base,
+      system: [{ type: "text", text: "You are helpful.", cache_control: { type: "ephemeral" } }],
+    };
+
+    const result = AnthropicRequestSchema.parse(input);
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    const block = (result.system as any[])[0];
+
+    expect(block.cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  test("preserves cache_control on tool definition", () => {
+    const input = {
+      ...base,
+      tools: [
+        {
+          name: "get_weather",
+          description: "Get weather",
+          input_schema: { type: "object", properties: { city: { type: "string" } } },
+          cache_control: { type: "ephemeral" },
+        },
+      ],
+    };
+
+    const result = AnthropicRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result.tools![0] as any).cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  test("preserves cache_control on message", () => {
+    const input = {
+      ...base,
+      messages: [{ role: "user", content: "Hello", cache_control: { type: "ephemeral" } }],
+    };
+
+    const result = AnthropicRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result.messages[0] as any).cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  test("preserves unknown top-level fields", () => {
+    const input = { ...base, custom_field: "preserved" };
+
+    const result = AnthropicRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result as any).custom_field).toBe("preserved");
+  });
+});
index d7fda9e89a53fda779d03c120f1f8343b06dfa71..3cf49a87872c270d33acc784f0dc592f0afbaaf5 100644 (file)
@@ -1,5 +1,6 @@
 import { describe, expect, test } from "bun:test";
 import { Hono } from "hono";
+import { OpenAIRequestSchema } from "../providers/openai/types";
 import { openaiRoutes } from "./openai";
 
 const app = new Hono();
@@ -43,3 +44,76 @@ describe("POST /openai/v1/chat/completions", () => {
     expect(res.status).toBe(400);
   });
 });
+
+describe("Zod schema preserves unknown fields", () => {
+  const base = {
+    model: "gpt-4o",
+    messages: [{ role: "user", content: "Hello" }],
+  };
+
+  test("preserves name field on message", () => {
+    const input = {
+      ...base,
+      messages: [{ role: "user", content: "Hello", name: "test_user" }],
+    };
+
+    const result = OpenAIRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result.messages[0] as any).name).toBe("test_user");
+  });
+
+  test("preserves tool_calls on assistant message", () => {
+    const input = {
+      ...base,
+      messages: [
+        {
+          role: "assistant",
+          content: null,
+          tool_calls: [
+            {
+              id: "call_123",
+              type: "function",
+              function: { name: "get_weather", arguments: "{}" },
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = OpenAIRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result.messages[0] as any).tool_calls).toHaveLength(1);
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result.messages[0] as any).tool_calls[0].id).toBe("call_123");
+  });
+
+  test("preserves audio content part fields", () => {
+    const input = {
+      ...base,
+      messages: [
+        {
+          role: "user",
+          content: [{ type: "input_audio", input_audio: { data: "base64...", format: "wav" } }],
+        },
+      ],
+    };
+
+    const result = OpenAIRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    const part = (result.messages[0].content as any[])[0];
+    expect(part.type).toBe("input_audio");
+    expect(part.input_audio.format).toBe("wav");
+  });
+
+  test("preserves unknown top-level fields", () => {
+    const input = { ...base, custom_field: "preserved" };
+
+    const result = OpenAIRequestSchema.parse(input);
+
+    // biome-ignore lint/suspicious/noExplicitAny: testing unknown field preservation
+    expect((result as any).custom_field).toBe("preserved");
+  });
+});
git clone https://git.99rst.org/PROJECT