From: Stefan Gasser Date: Fri, 16 Jan 2026 16:14:37 +0000 (+0100) Subject: Clean up comments and rename resolveConflictsSimple to resolveOverlaps X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=abd2c85e68969e56e579432edc0ba4e072496a23;p=sgasser-llm-shield.git Clean up comments and rename resolveConflictsSimple to resolveOverlaps --- diff --git a/src/secrets/redact.ts b/src/secrets/redact.ts index d460a62..9526512 100644 --- a/src/secrets/redact.ts +++ b/src/secrets/redact.ts @@ -1,6 +1,6 @@ import { findPartialPlaceholderStart, generateSecretPlaceholder } from "../constants/placeholders"; import type { ChatCompletionResponse, ChatMessage } from "../services/llm-client"; -import { resolveConflictsSimple } from "../utils/conflict-resolver"; +import { resolveOverlaps } from "../utils/conflict-resolver"; import { extractTextContent } from "../utils/content"; import type { SecretsRedaction } from "./detect"; @@ -67,7 +67,7 @@ export function redactSecrets( } // Resolve conflicts between overlapping redactions - const resolved = resolveConflictsSimple(redactions); + const resolved = resolveOverlaps(redactions); // First pass: sort by start position ascending to assign placeholders in order of appearance const sortedByStart = [...resolved].sort((a, b) => a.start - b.start); diff --git a/src/utils/conflict-resolver.test.ts b/src/utils/conflict-resolver.test.ts index f7c75cf..32b0b36 100644 --- a/src/utils/conflict-resolver.test.ts +++ b/src/utils/conflict-resolver.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from "bun:test"; import { type EntityWithScore, resolveConflicts, - resolveConflictsSimple, + resolveOverlaps, } from "./conflict-resolver"; describe("resolveConflicts (Presidio-style)", () => { @@ -124,14 +124,14 @@ describe("resolveConflicts (Presidio-style)", () => { }); }); -describe("resolveConflictsSimple (for secrets without scores)", () => { +describe("resolveOverlaps (for secrets without scores)", () => { test("returns empty array for empty input", () => { - expect(resolveConflictsSimple([])).toEqual([]); + expect(resolveOverlaps([])).toEqual([]); }); test("returns single entity unchanged", () => { const entities = [{ start: 0, end: 5 }]; - expect(resolveConflictsSimple(entities)).toEqual(entities); + expect(resolveOverlaps(entities)).toEqual(entities); }); test("keeps non-overlapping entities", () => { @@ -139,7 +139,7 @@ describe("resolveConflictsSimple (for secrets without scores)", () => { { start: 0, end: 5 }, { start: 10, end: 15 }, ]; - expect(resolveConflictsSimple(entities)).toEqual(entities); + expect(resolveOverlaps(entities)).toEqual(entities); }); test("keeps adjacent entities", () => { @@ -147,7 +147,7 @@ describe("resolveConflictsSimple (for secrets without scores)", () => { { start: 0, end: 4 }, { start: 4, end: 9 }, ]; - expect(resolveConflictsSimple(entities)).toEqual(entities); + expect(resolveOverlaps(entities)).toEqual(entities); }); test("keeps longer when same start position", () => { @@ -155,7 +155,7 @@ describe("resolveConflictsSimple (for secrets without scores)", () => { { start: 6, end: 10 }, { start: 6, end: 12 }, ]; - const result = resolveConflictsSimple(entities); + const result = resolveOverlaps(entities); expect(result).toHaveLength(1); expect(result[0].end).toBe(12); }); @@ -165,7 +165,7 @@ describe("resolveConflictsSimple (for secrets without scores)", () => { { start: 0, end: 10 }, { start: 5, end: 15 }, ]; - const result = resolveConflictsSimple(entities); + const result = resolveOverlaps(entities); expect(result).toHaveLength(1); expect(result[0].start).toBe(0); }); @@ -175,7 +175,7 @@ describe("resolveConflictsSimple (for secrets without scores)", () => { { start: 0, end: 14 }, { start: 4, end: 8 }, ]; - const result = resolveConflictsSimple(entities); + const result = resolveOverlaps(entities); expect(result).toHaveLength(1); expect(result[0].end).toBe(14); }); diff --git a/src/utils/conflict-resolver.ts b/src/utils/conflict-resolver.ts index e4b4138..87d01be 100644 --- a/src/utils/conflict-resolver.ts +++ b/src/utils/conflict-resolver.ts @@ -1,9 +1,5 @@ -/** - * Conflict resolution for overlapping entities - * - * Based on Microsoft Presidio's conflict resolution logic: - * https://github.com/microsoft/presidio/blob/main/presidio-anonymizer/presidio_anonymizer/anonymizer_engine.py - */ +// Conflict resolution based on Microsoft Presidio's logic +// https://github.com/microsoft/presidio/blob/main/presidio-anonymizer/presidio_anonymizer/anonymizer_engine.py export interface EntityWithScore { start: number; @@ -36,9 +32,6 @@ function groupBy(items: T[], keyFn: (item: T) => string): Map { return groups; } -/** - * Merge overlapping intervals. Returns new array (does not mutate input). - */ function mergeOverlapping( intervals: T[], merge: (a: T, b: T) => T, @@ -53,7 +46,6 @@ function mergeOverlapping( const last = result[result.length - 1]; if (overlaps(current, last)) { - // Replace last with merged interval result[result.length - 1] = merge(last, current); } else { result.push(current); @@ -63,13 +55,9 @@ function mergeOverlapping( return result; } -/** - * Remove entities that are contained in another or have same indices with lower score. - */ function removeConflicting(entities: T[]): T[] { if (entities.length <= 1) return [...entities]; - // Sort by start, then by score descending (higher score first) const sorted = [...entities].sort((a, b) => { if (a.start !== b.start) return a.start - b.start; if (a.end !== b.end) return a.end - b.end; @@ -94,12 +82,7 @@ function removeConflicting(entities: T[]): T[] { return result; } -/** - * Resolve conflicts between overlapping entities using Presidio's algorithm. - * - * Phase 1: Merge overlapping entities of the same type (expand boundaries, keep highest score) - * Phase 2: Remove conflicting entities of different types (contained or same indices with lower score) - */ +/** For PII entities with scores. Merges same-type overlaps, removes cross-type conflicts. */ export function resolveConflicts(entities: T[]): T[] { if (entities.length <= 1) return [...entities]; @@ -119,11 +102,8 @@ export function resolveConflicts(entities: T[]): T[] return removeConflicting(afterMerge); } -/** - * Simple overlap resolution for entities without scores. - * Uses length as tiebreaker (longer wins). For secrets detection. - */ -export function resolveConflictsSimple(entities: T[]): T[] { +/** For secrets without scores. Keeps non-overlapping, longer wins ties. */ +export function resolveOverlaps(entities: T[]): T[] { if (entities.length <= 1) return [...entities]; const sorted = [...entities].sort((a, b) => {