From: Stefan Gasser Date: Fri, 16 Jan 2026 16:35:42 +0000 (+0100) Subject: Fix partial overlap bug: higher score wins for all overlaps X-Git-Url: http://git.99rst.org/?a=commitdiff_plain;h=2315371dc94950702db27d7d1bb248fccb08c73d;p=sgasser-llm-shield.git Fix partial overlap bug: higher score wins for all overlaps Previous behavior kept both entities when they partially overlapped but had different types, causing text corruption during masking. Now: sort by score desc, then length desc, then position. Any overlap removes the lower-scored entity. --- diff --git a/src/utils/conflict-resolver.test.ts b/src/utils/conflict-resolver.test.ts index ef85a96..ef4fbfe 100644 --- a/src/utils/conflict-resolver.test.ts +++ b/src/utils/conflict-resolver.test.ts @@ -45,14 +45,14 @@ describe("resolveConflicts", () => { expect(result[0].score).toBe(0.85); }); - test("different type contained removed", () => { + test("different type overlapping higher score wins", () => { const entities = [ { start: 0, end: 10, score: 0.7, entity_type: "PHONE_NUMBER" }, { start: 2, end: 8, score: 0.9, entity_type: "US_SSN" }, ]; const result = resolveConflicts(entities); expect(result).toHaveLength(1); - expect(result[0].entity_type).toBe("PHONE_NUMBER"); + expect(result[0].entity_type).toBe("US_SSN"); }); test("same indices different types higher score wins", () => { @@ -65,6 +65,16 @@ describe("resolveConflicts", () => { expect(result[0].entity_type).toBe("EMAIL_ADDRESS"); }); + test("partial overlap different types higher score wins", () => { + const entities = [ + { start: 0, end: 10, score: 0.7, entity_type: "PHONE_NUMBER" }, + { start: 5, end: 15, score: 0.9, entity_type: "EMAIL_ADDRESS" }, + ]; + const result = resolveConflicts(entities); + expect(result).toHaveLength(1); + expect(result[0].entity_type).toBe("EMAIL_ADDRESS"); + }); + test("Eric vs Eric's merged correctly", () => { const entities = [ { start: 6, end: 10, score: 0.85, entity_type: "PERSON" }, diff --git a/src/utils/conflict-resolver.ts b/src/utils/conflict-resolver.ts index 87d01be..d9fd04f 100644 --- a/src/utils/conflict-resolver.ts +++ b/src/utils/conflict-resolver.ts @@ -59,20 +59,17 @@ function removeConflicting(entities: T[]): T[] { if (entities.length <= 1) return [...entities]; const sorted = [...entities].sort((a, b) => { - if (a.start !== b.start) return a.start - b.start; - if (a.end !== b.end) return a.end - b.end; - return b.score - a.score; + if (a.score !== b.score) return b.score - a.score; + const aLen = a.end - a.start; + const bLen = b.end - b.start; + if (aLen !== bLen) return bLen - aLen; + return a.start - b.start; }); const result: T[] = []; for (const entity of sorted) { - const hasConflict = result.some((kept) => { - if (entity.start === kept.start && entity.end === kept.end) { - return true; - } - return isContainedIn(entity, kept); - }); + const hasConflict = result.some((kept) => overlaps(entity, kept)); if (!hasConflict) { result.push(entity);