Allows configuring which message roles to scan for PII and secrets.
By default all roles are scanned (existing behavior). When scan_roles
is set, only messages with matching roles are sent to Presidio.
Use case: Skip scanning large system prompts to reduce API calls and
avoid false positives on app-controlled content.
- Add role field to TextSpan type
- OpenAI extractor populates role from message
- PII detector filters by scan_roles before Presidio calls
- Secrets detector filters by scan_roles before detection
# - MEDICAL_LICENSE
# - URL
+ # Which message roles to scan for PII (optional)
+ # By default, all roles are scanned. Set this to scan only user-controlled content:
+ # - user: User messages (primary source of PII)
+ # - tool: Tool/function call results (may contain user data)
+ # - function: Legacy function results (OpenAI)
+ # scan_roles:
+ # - user
+ # - tool
+ # - function
+
# Secrets Detection settings (Secrets Shield)
# Detects private keys, API keys, tokens and other secret credentials in requests
secrets_detection:
# Even if logging.log_content is true, secret content is never logged
log_detected_types: true
+ # Which message roles to scan for secrets (optional)
+ # By default, all roles are scanned. Set this to scan only user-controlled content:
+ # - user: User messages (primary source of secrets)
+ # - tool: Tool/function call results (may contain secrets)
+ # - function: Legacy function results (OpenAI)
+ # scan_roles:
+ # - user
+ # - tool
+ # - function
+
# Logging settings
logging:
# SQLite database for request logs
"IP_ADDRESS",
"LOCATION",
]),
+ scan_roles: z.array(z.string()).optional(),
});
const ServerSchema = z.object({
entities: z.array(z.enum(SecretEntityTypes)).default(["OPENSSH_PRIVATE_KEY", "PEM_PRIVATE_KEY"]),
max_scan_chars: z.coerce.number().int().min(0).default(200000),
log_detected_types: z.boolean().default(true),
+ scan_roles: z.array(z.string()).optional(),
});
const ConfigSchema = z
path: "messages[0].content",
messageIndex: 0,
partIndex: 0,
+ role: "system",
});
expect(spans[1]).toEqual({
text: "Hello world",
path: "messages[1].content",
messageIndex: 1,
partIndex: 0,
+ role: "user",
});
});
path: "messages[0].content[0].text",
messageIndex: 0,
partIndex: 0,
+ role: "user",
});
expect(spans[1]).toEqual({
text: "Be detailed",
path: "messages[0].content[2].text",
messageIndex: 0,
partIndex: 2,
+ role: "user",
});
});
expect(spans).toHaveLength(3);
expect(spans[0].messageIndex).toBe(0);
+ expect(spans[0].role).toBe("system");
expect(spans[1].messageIndex).toBe(1);
+ expect(spans[1].role).toBe("user");
expect(spans[2].messageIndex).toBe(2);
+ expect(spans[2].role).toBe("assistant");
});
test("skips null/undefined content", () => {
path: `messages[${msgIdx}].content`,
messageIndex: msgIdx,
partIndex: 0,
+ role: msg.role,
});
continue;
}
path: `messages[${msgIdx}].content[${partIdx}].text`,
messageIndex: msgIdx,
partIndex: partIdx,
+ role: msg.role,
});
}
}
messageIndex: number;
partIndex: number;
nestedPartIndex?: number;
+ role?: string;
}
export interface MaskedSpan {
: { language: config.pii_detection.fallback_language, usedFallback: true };
// Detect PII for each span independently
+ const scanRoles = config.pii_detection.scan_roles
+ ? new Set(config.pii_detection.scan_roles)
+ : null;
+
const spanEntities: PIIEntity[][] = await Promise.all(
spans.map(async (span) => {
+ if (scanRoles && span.role && !scanRoles.has(span.role)) {
+ return [];
+ }
if (!span.text) return [];
return this.detectPII(span.text, langResult.language);
}),
}
// Detect secrets in each span
+ const scanRoles = config.scan_roles ? new Set(config.scan_roles) : null;
+
const matchCounts = new Map<string, number>();
const spanLocations: SecretLocation[][] = spans.map((span) => {
+ if (scanRoles && span.role && !scanRoles.has(span.role)) {
+ return [];
+ }
const result = detectSecrets(span.text, config);
for (const match of result.matches) {
matchCounts.set(match.type, (matchCounts.get(match.type) || 0) + match.count);