Skip to content

Commit 4897402

Browse files
authored
Fix inconsistent NMS implementation between CPU and CUDA (#1556)
* Fix inconsistent NMS implementation * Improve tests for NMS * Remove unnecessary using statement
1 parent 8909ff4 commit 4897402

File tree

3 files changed

+12
-6
lines changed

3 files changed

+12
-6
lines changed

test/test_ops.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,26 +1196,33 @@ def reference_nms(self, boxes, scores, iou_threshold):
11961196

11971197
return torch.as_tensor(picked)
11981198

1199-
def _create_tensors(self, N):
1199+
def _create_tensors_with_iou(self, N, iou_thresh):
1200+
# force last box to have a pre-defined iou with the first box
1201+
# let b0 be [x0, y0, x1, y1], and b1 be [x0, y0, x1 + d, y1],
1202+
# then, in order to satisfy ops.iou(b0, b1) == iou_thresh,
1203+
# we need to have d = (x1 - x0) * (1 - iou_thresh) / iou_thresh
12001204
boxes = torch.rand(N, 4) * 100
1201-
boxes[:, 2:] += torch.rand(N, 2) * 100
1205+
boxes[:, 2:] += boxes[:, :2]
1206+
boxes[-1, :] = boxes[0, :]
1207+
x0, y0, x1, y1 = boxes[-1].tolist()
1208+
boxes[-1, 2] += (x1 - x0) * (1 - iou_thresh) / iou_thresh
12021209
scores = torch.rand(N)
12031210
return boxes, scores
12041211

12051212
def test_nms(self):
1206-
boxes, scores = self._create_tensors(1000)
12071213
err_msg = 'NMS incompatible between CPU and reference implementation for IoU={}'
12081214
for iou in [0.2, 0.5, 0.8]:
1215+
boxes, scores = self._create_tensors_with_iou(1000, iou)
12091216
keep_ref = self.reference_nms(boxes, scores, iou)
12101217
keep = ops.nms(boxes, scores, iou)
12111218
self.assertTrue(torch.allclose(keep, keep_ref), err_msg.format(iou))
12121219

12131220
@unittest.skipIf(not torch.cuda.is_available(), "CUDA unavailable")
12141221
def test_nms_cuda(self):
1215-
boxes, scores = self._create_tensors(1000)
12161222
err_msg = 'NMS incompatible between CPU and CUDA for IoU={}'
12171223

12181224
for iou in [0.2, 0.5, 0.8]:
1225+
boxes, scores = self._create_tensors_with_iou(1000, iou)
12191226
r_cpu = ops.nms(boxes, scores, iou)
12201227
r_cuda = ops.nms(boxes.cuda(), scores.cuda(), iou)
12211228

torchvision/csrc/cpu/nms_cpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ at::Tensor nms_cpu_kernel(
6161
auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1);
6262
auto inter = w * h;
6363
auto ovr = inter / (iarea + areas[j] - inter);
64-
if (ovr >= iou_threshold)
64+
if (ovr > iou_threshold)
6565
suppressed[j] = 1;
6666
}
6767
}

torchvision/csrc/cuda/nms_cuda.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ __global__ void nms_kernel(
7272
at::Tensor nms_cuda(const at::Tensor& dets,
7373
const at::Tensor& scores,
7474
float iou_threshold) {
75-
using scalar_t = float;
7675
AT_ASSERTM(dets.type().is_cuda(), "dets must be a CUDA tensor");
7776
AT_ASSERTM(scores.type().is_cuda(), "scores must be a CUDA tensor");
7877
at::cuda::CUDAGuard device_guard(dets.device());

0 commit comments

Comments
 (0)