Previously, language detection was skipped when only one language was
configured, returning the configured language directly. This made it
impossible to detect misconfiguration (e.g., only EN configured but
receiving DE text).
Now language detection always runs, providing:
- Actual detected language in logs (detectedLanguage field)
- Confidence score for debugging
- usedFallback=true when detected language isn't configured
Performance impact is negligible (~0.01-0.05ms per detection).
# Supported languages for PII detection
# Auto-detects language from input text and uses appropriate model
- # If only one language is specified, language detection is skipped
#
# Languages must match what was installed during docker build:
# LANGUAGES=en,de docker-compose build
}
detect(text: string): LanguageDetectionResult {
- if (this.configuredLanguages.length === 1) {
- return {
- language: this.configuredLanguages[0],
- usedFallback: false,
- };
- }
-
const result = eld.detect(text);
const detectedIso = result.language;
const scores = result.getScores();
const confidence = scores[detectedIso] ?? 0;
+
// Use override if exists, otherwise use the detected code as-is (most are 1:1)
const presidioLang = (ISO_TO_PRESIDIO_OVERRIDES[detectedIso] ||
detectedIso) as SupportedLanguage;