From: Stefan Gasser Date: Mon, 12 Jan 2026 07:13:59 +0000 (+0100) Subject: Always run language detection even with single language configured (#26) X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=30a859b52f8dc27c4bfea9f08e40f30e9c58d85f;p=sgasser-llm-shield.git Always run language detection even with single language configured (#26) Previously, language detection was skipped when only one language was configured, returning the configured language directly. This made it impossible to detect misconfiguration (e.g., only EN configured but receiving DE text). Now language detection always runs, providing: - Actual detected language in logs (detectedLanguage field) - Confidence score for debugging - usedFallback=true when detected language isn't configured Performance impact is negligible (~0.01-0.05ms per detection). --- diff --git a/config.example.yaml b/config.example.yaml index 6ada82e..dc2896a 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -51,7 +51,6 @@ pii_detection: # Supported languages for PII detection # Auto-detects language from input text and uses appropriate model - # If only one language is specified, language detection is skipped # # Languages must match what was installed during docker build: # LANGUAGES=en,de docker-compose build diff --git a/src/services/language-detector.ts b/src/services/language-detector.ts index 5991538..4442432 100644 --- a/src/services/language-detector.ts +++ b/src/services/language-detector.ts @@ -51,17 +51,11 @@ export class LanguageDetector { } detect(text: string): LanguageDetectionResult { - if (this.configuredLanguages.length === 1) { - return { - language: this.configuredLanguages[0], - usedFallback: false, - }; - } - const result = eld.detect(text); const detectedIso = result.language; const scores = result.getScores(); const confidence = scores[detectedIso] ?? 0; + // Use override if exists, otherwise use the detected code as-is (most are 1:1) const presidioLang = (ISO_TO_PRESIDIO_OVERRIDES[detectedIso] || detectedIso) as SupportedLanguage;