Fix partial overlap bug: higher score wins for all overlaps
authorStefan Gasser <redacted>
Fri, 16 Jan 2026 16:35:42 +0000 (17:35 +0100)
committerStefan Gasser <redacted>
Fri, 16 Jan 2026 16:35:42 +0000 (17:35 +0100)
Previous behavior kept both entities when they partially overlapped
but had different types, causing text corruption during masking.

Now: sort by score desc, then length desc, then position. Any overlap
removes the lower-scored entity.

src/utils/conflict-resolver.test.ts
src/utils/conflict-resolver.ts

index ef85a9623840e6ff7f701d61ff7eb4ea4e50bb66..ef4fbfe01ff824b7e3776427477b52fa59de403d 100644 (file)
@@ -45,14 +45,14 @@ describe("resolveConflicts", () => {
     expect(result[0].score).toBe(0.85);
   });
 
-  test("different type contained removed", () => {
+  test("different type overlapping higher score wins", () => {
     const entities = [
       { start: 0, end: 10, score: 0.7, entity_type: "PHONE_NUMBER" },
       { start: 2, end: 8, score: 0.9, entity_type: "US_SSN" },
     ];
     const result = resolveConflicts(entities);
     expect(result).toHaveLength(1);
-    expect(result[0].entity_type).toBe("PHONE_NUMBER");
+    expect(result[0].entity_type).toBe("US_SSN");
   });
 
   test("same indices different types higher score wins", () => {
@@ -65,6 +65,16 @@ describe("resolveConflicts", () => {
     expect(result[0].entity_type).toBe("EMAIL_ADDRESS");
   });
 
+  test("partial overlap different types higher score wins", () => {
+    const entities = [
+      { start: 0, end: 10, score: 0.7, entity_type: "PHONE_NUMBER" },
+      { start: 5, end: 15, score: 0.9, entity_type: "EMAIL_ADDRESS" },
+    ];
+    const result = resolveConflicts(entities);
+    expect(result).toHaveLength(1);
+    expect(result[0].entity_type).toBe("EMAIL_ADDRESS");
+  });
+
   test("Eric vs Eric's merged correctly", () => {
     const entities = [
       { start: 6, end: 10, score: 0.85, entity_type: "PERSON" },
index 87d01bea4d99f0c9d3102da4796bd8d0e146a546..d9fd04f29134fdced0db8d1b29c8419bd5510291 100644 (file)
@@ -59,20 +59,17 @@ function removeConflicting<T extends EntityWithScore>(entities: T[]): T[] {
   if (entities.length <= 1) return [...entities];
 
   const sorted = [...entities].sort((a, b) => {
-    if (a.start !== b.start) return a.start - b.start;
-    if (a.end !== b.end) return a.end - b.end;
-    return b.score - a.score;
+    if (a.score !== b.score) return b.score - a.score;
+    const aLen = a.end - a.start;
+    const bLen = b.end - b.start;
+    if (aLen !== bLen) return bLen - aLen;
+    return a.start - b.start;
   });
 
   const result: T[] = [];
 
   for (const entity of sorted) {
-    const hasConflict = result.some((kept) => {
-      if (entity.start === kept.start && entity.end === kept.end) {
-        return true;
-      }
-      return isContainedIn(entity, kept);
-    });
+    const hasConflict = result.some((kept) => overlaps(entity, kept));
 
     if (!hasConflict) {
       result.push(entity);
git clone https://git.99rst.org/PROJECT