Add scan_roles config for role-based PII/secrets filtering (#52)

author Stefan Gasser <redacted>

Tue, 20 Jan 2026 19:07:03 +0000 (20:07 +0100)

committer GitHub <redacted>

Tue, 20 Jan 2026 19:07:03 +0000 (20:07 +0100)
author Stefan Gasser <redacted>
Tue, 20 Jan 2026 19:07:03 +0000 (20:07 +0100)
committer GitHub <redacted>
Tue, 20 Jan 2026 19:07:03 +0000 (20:07 +0100)
diff --git a/config.example.yaml b/config.example.yaml

index 6a660d11bd1667e12824690c6824dbfdbe76c49e..f7c5faa97a68a362e825b00658861480100f9925 100644 (file)
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -85,6 +85,16 @@ pii_detection:
      # - MEDICAL_LICENSE
      # - URL
  
+  # Which message roles to scan for PII (optional)
+  # By default, all roles are scanned. Set this to scan only user-controlled content:
+  #   - user:      User messages (primary source of PII)
+  #   - tool:      Tool/function call results (may contain user data)
+  #   - function:  Legacy function results (OpenAI)
+  # scan_roles:
+  #   - user
+  #   - tool
+  #   - function
+
  # Secrets Detection settings (Secrets Shield)
  # Detects private keys, API keys, tokens and other secret credentials in requests
  secrets_detection:
@@ -138,6 +148,16 @@ secrets_detection:
    # Even if logging.log_content is true, secret content is never logged
    log_detected_types: true
  
+  # Which message roles to scan for secrets (optional)
+  # By default, all roles are scanned. Set this to scan only user-controlled content:
+  #   - user:      User messages (primary source of secrets)
+  #   - tool:      Tool/function call results (may contain secrets)
+  #   - function:  Legacy function results (OpenAI)
+  # scan_roles:
+  #   - user
+  #   - tool
+  #   - function
+
  # Logging settings
  logging:
    # SQLite database for request logs
diff --git a/src/config.ts b/src/config.ts

index 6b338c7a6bcdc54fa39179687277c7709841ad4a..f22f6982b39fa92c2b2e3396e2dc98c7f54bf74a 100644 (file)
--- a/src/config.ts
+++ b/src/config.ts
@@ -54,6 +54,7 @@ const PIIDetectionSchema = z.object({
        "IP_ADDRESS",
        "LOCATION",
      ]),
+  scan_roles: z.array(z.string()).optional(),
  });
  
  const ServerSchema = z.object({
@@ -98,6 +99,7 @@ const SecretsDetectionSchema = z.object({
    entities: z.array(z.enum(SecretEntityTypes)).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]),
    max_scan_chars: z.coerce.number().int().min(0).default(200000),
    log_detected_types: z.boolean().default(true),
+  scan_roles: z.array(z.string()).optional(),
  });
  
  const ConfigSchema = z
diff --git a/src/masking/extractors/openai.test.ts b/src/masking/extractors/openai.test.ts

index 5640a4610ca5244be76e389d78853ac354b7a096..9ae4eedd2f4de7dbae28d6b0fc3dc2a5853e83a6 100644 (file)
--- a/src/masking/extractors/openai.test.ts
+++ b/src/masking/extractors/openai.test.ts
@@ -24,12 +24,14 @@ describe("OpenAI Text Extractor", () => {
          path: "messages[0].content",
          messageIndex: 0,
          partIndex: 0,
+        role: "system",
        });
        expect(spans[1]).toEqual({
          text: "Hello world",
          path: "messages[1].content",
          messageIndex: 1,
          partIndex: 0,
+        role: "user",
        });
      });
  
@@ -53,12 +55,14 @@ describe("OpenAI Text Extractor", () => {
          path: "messages[0].content[0].text",
          messageIndex: 0,
          partIndex: 0,
+        role: "user",
        });
        expect(spans[1]).toEqual({
          text: "Be detailed",
          path: "messages[0].content[2].text",
          messageIndex: 0,
          partIndex: 2,
+        role: "user",
        });
      });
  
@@ -76,8 +80,11 @@ describe("OpenAI Text Extractor", () => {
  
        expect(spans).toHaveLength(3);
        expect(spans[0].messageIndex).toBe(0);
+      expect(spans[0].role).toBe("system");
        expect(spans[1].messageIndex).toBe(1);
+      expect(spans[1].role).toBe("user");
        expect(spans[2].messageIndex).toBe(2);
+      expect(spans[2].role).toBe("assistant");
      });
  
      test("skips null/undefined content", () => {
diff --git a/src/masking/extractors/openai.ts b/src/masking/extractors/openai.ts

index 4cca57eee5e86f3d13a6197443c7622a189d3073..b03212be5e8d7cb6005116718ff46282a735e0ed 100644 (file)
--- a/src/masking/extractors/openai.ts
+++ b/src/masking/extractors/openai.ts
@@ -33,6 +33,7 @@ export const openaiExtractor: RequestExtractor<OpenAIRequest, OpenAIResponse> =
            path: `messages[${msgIdx}].content`,
            messageIndex: msgIdx,
            partIndex: 0,
+          role: msg.role,
          });
          continue;
        }
@@ -46,6 +47,7 @@ export const openaiExtractor: RequestExtractor<OpenAIRequest, OpenAIResponse> =
                path: `messages[${msgIdx}].content[${partIdx}].text`,
                messageIndex: msgIdx,
                partIndex: partIdx,
+              role: msg.role,
              });
            }
          }
diff --git a/src/masking/types.ts b/src/masking/types.ts

index e63c84d693945a6a1e6fd6349a9c1059f00a6fbd..a68869a12aae62a6f5fbd8a525bfcf3101f36746 100644 (file)
--- a/src/masking/types.ts
+++ b/src/masking/types.ts
@@ -10,6 +10,7 @@ export interface TextSpan {
    messageIndex: number;
    partIndex: number;
    nestedPartIndex?: number;
+  role?: string;
  }
  
  export interface MaskedSpan {
diff --git a/src/pii/detect.ts b/src/pii/detect.ts

index ecd9bff6a71a2b2737e9a85f06d73d78d3f6f04f..eb07a448ebd1628e8d50207f0a99dff0f752db1e 100644 (file)
--- a/src/pii/detect.ts
+++ b/src/pii/detect.ts
@@ -100,8 +100,15 @@ export class PIIDetector {
        : { language: config.pii_detection.fallback_language, usedFallback: true };
  
      // Detect PII for each span independently
+    const scanRoles = config.pii_detection.scan_roles
+      ? new Set(config.pii_detection.scan_roles)
+      : null;
+
      const spanEntities: PIIEntity[][] = await Promise.all(
        spans.map(async (span) => {
+        if (scanRoles && span.role && !scanRoles.has(span.role)) {
+          return [];
+        }
          if (!span.text) return [];
          return this.detectPII(span.text, langResult.language);
        }),
diff --git a/src/secrets/detect.ts b/src/secrets/detect.ts

index 35bfc058ee595c21397d868720d34d031ac52447..2f2a760e1a74f991c4bb8aa20d7ff5b08c8fd8e1 100644 (file)
--- a/src/secrets/detect.ts
+++ b/src/secrets/detect.ts
@@ -95,8 +95,13 @@ export function detectSecretsInSpans(
    }
  
    // Detect secrets in each span
+  const scanRoles = config.scan_roles ? new Set(config.scan_roles) : null;
+
    const matchCounts = new Map<string, number>();
    const spanLocations: SecretLocation[][] = spans.map((span) => {
+    if (scanRoles && span.role && !scanRoles.has(span.role)) {
+      return [];
+    }
      const result = detectSecrets(span.text, config);
      for (const match of result.matches) {
        matchCounts.set(match.type, (matchCounts.get(match.type) || 0) + match.count);
author	Stefan Gasser <redacted>
	Tue, 20 Jan 2026 19:07:03 +0000 (20:07 +0100)
committer	GitHub <redacted>
	Tue, 20 Jan 2026 19:07:03 +0000 (20:07 +0100)
config.example.yaml		patch \| blob \| history
src/config.ts		patch \| blob \| history
src/masking/extractors/openai.test.ts		patch \| blob \| history
src/masking/extractors/openai.ts		patch \| blob \| history
src/masking/types.ts		patch \| blob \| history
src/pii/detect.ts		patch \| blob \| history
src/secrets/detect.ts		patch \| blob \| history