From dd11a107ce6f8090e2b270b4536ef52e16c96b73 Mon Sep 17 00:00:00 2001
From: kytimmylai <kytimmylai@gmail.com>
Date: Wed, 15 Jan 2025 22:26:47 +0800
Subject: [PATCH] Prevent duplicate detections caused by the postprocess

---
 ultralytics/utils/ops.py | 58 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/ultralytics/utils/ops.py b/ultralytics/utils/ops.py
index edbb1031..abc3617a 100644
--- a/ultralytics/utils/ops.py
+++ b/ultralytics/utils/ops.py
@@ -848,7 +848,7 @@ def clean_str(s):
     """
     return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
 
-def v10postprocess(preds, max_det, nc=80):
+def origin_v10postprocess(preds, max_det, nc=80):
     assert(4 + nc == preds.shape[-1])
     boxes, scores = preds.split([4, nc], dim=-1)
     max_scores = scores.amax(dim=-1)
@@ -861,4 +861,58 @@ def v10postprocess(preds, max_det, nc=80):
     labels = index % nc
     index = index // nc
     boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
-    return boxes, scores, labels
\ No newline at end of file
+    return boxes, scores, labels
+
+def v10postprocess(preds, max_det, nc=80):
+    assert(4 + nc == preds.shape[-1])
+    boxes, scores = preds.split([4, nc], dim=-1)
+    max_scores, scores_index = scores.max(dim=-1)
+    instance_cls, instance_index = torch.topk(max_scores, max_det, dim=-1)
+    boxes = torch.gather(boxes, dim=1, index=instance_index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
+    labels = torch.gather(scores_index, dim=1, index=instance_index)
+    scores = instance_cls
+    return boxes, scores, labels
+
+if __name__ == '__main__':
+    """
+    Suppose the model has 4 detection class and we have 3 detections only
+    The original postprocess function produces the following output:
+
+    tensor([[[3.0000, 4.0000, 5.0000, 6.0000, 0.5000, 3.0000],
+             [3.0000, 4.0000, 5.0000, 6.0000, 0.4000, 2.0000], <- duplicated class
+             [2.0000, 3.0000, 4.0000, 5.0000, 0.3100, 3.0000]]])
+
+    Notice how the detection with two high-confidence classes appears twice, overlapping
+    the third detection. The expected output should be:
+
+    tensor([[[3.0000, 4.0000, 5.0000, 6.0000, 0.5000, 3.0000],
+             [2.0000, 3.0000, 4.0000, 5.0000, 0.3100, 3.0000],
+             [1.0000, 2.0000, 3.0000, 4.0000, 0.3000, 3.0000]]]) <-
+
+    which is obtained by the new postprocess function.
+
+    However, while the new postprocess runs faster, it currently yields a lower mAP.
+    It's unclear if this change was intentional or not.
+
+    Below are some benchmark results:
+
+    yolov10n
+    Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
+                   all       5000      36335      0.649      0.483      0.519      0.372
+
+    yolov10s
+    Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 
+                   all       5000      36335      0.706       0.56       0.61      0.449
+    """
+    
+    test_preds = torch.tensor([[
+        [1, 2, 3, 4, 0.2, 0.25, 0.25, 0.3], 
+        [2, 3, 4, 5, 0.2, 0.24, 0.25, 0.31], 
+        [3, 4, 5, 6, 0.05, 0.05, 0.4, 0.5]
+    ]])
+    boxes, scores, labels = origin_v10postprocess(test_preds, max_det=3, nc=4)
+    pred = torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
+    print(pred)
+    boxes, scores, labels = v10postprocess(test_preds, max_det=3, nc=4)
+    pred = torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
+    print(pred)
\ No newline at end of file