From 135763c010b9816d6c5efce457e5a32aefdb1ee0 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Thu, 7 Apr 2022 16:20:07 +0200 Subject: [PATCH 01/16] [FEAT] Add distance IoU and distance IoU loss + some tests (WIP for tests). --- docs/source/ops.rst | 2 + test/test_ops.py | 38 +++++++++++++++ torchvision/ops/__init__.py | 4 ++ torchvision/ops/boxes.py | 44 ++++++++++++++++++ torchvision/ops/diou_loss.py | 90 ++++++++++++++++++++++++++++++++++++ 5 files changed, 178 insertions(+) create mode 100644 torchvision/ops/diou_loss.py diff --git a/docs/source/ops.rst b/docs/source/ops.rst index 3e996ecd6c8..5915b07e716 100644 --- a/docs/source/ops.rst +++ b/docs/source/ops.rst @@ -24,6 +24,8 @@ Operators drop_block3d generalized_box_iou generalized_box_iou_loss + distance_box_iou + distance_box_iou_loss masks_to_boxes nms ps_roi_align diff --git a/test/test_ops.py b/test/test_ops.py index ad9aaefee52..279e959f033 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1256,6 +1256,44 @@ def test_gen_iou(self, test_input: List, dtypes: List[torch.dtype], tolerance: f def test_giou_jit(self) -> None: self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) +class TestDistanceIoU(BoxTestBase): + def _target_fn(self) -> Tuple[bool, Callable]: + return (True, ops.distance_box_iou) + + def _generate_int_input() -> List[List[int]]: + return [[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]] + + # TODO: Update this. + def _generate_int_expected() -> List[List[float]]: + return [[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]] + + def _generate_float_input() -> List[List[float]]: + return [ + [285.3538, 185.5758, 1193.5110, 851.4551], + [285.1472, 188.7374, 1192.4984, 851.0669], + [279.2440, 197.9812, 1189.4746, 849.2019], + ] + + def _generate_float_expected() -> List[List[float]]: + return [[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]] + + @pytest.mark.parametrize( + "test_input, dtypes, tolerance, expected", + [ + # TODO: Fix this test. + # pytest.param( + # _generate_int_input(), [torch.int16, torch.int32, torch.int64], 1e-4, _generate_int_expected() + # ), + pytest.param(_generate_float_input(), [torch.float16], 0.002, _generate_float_expected()), + pytest.param(_generate_float_input(), [torch.float32, torch.float64], 0.001, _generate_float_expected()), + ], + ) + def test_distance_iou(self, test_input: List, dtypes: List[torch.dtype], tolerance: float, expected: List) -> None: + self._run_test(test_input, dtypes, tolerance, expected) + + def test_distance_iou_jit(self) -> None: + self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) + class TestMasksToBoxes: def test_masks_box(self): diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py index ceb78250415..955159dc5cc 100644 --- a/torchvision/ops/__init__.py +++ b/torchvision/ops/__init__.py @@ -7,6 +7,7 @@ box_area, box_iou, generalized_box_iou, + distance_box_iou, masks_to_boxes, ) from .boxes import box_convert @@ -15,6 +16,7 @@ from .feature_pyramid_network import FeaturePyramidNetwork from .focal_loss import sigmoid_focal_loss from .giou_loss import generalized_box_iou_loss +from .diou_loss import distance_box_iou_loss from .misc import FrozenBatchNorm2d, Conv2dNormActivation, Conv3dNormActivation, SqueezeExcitation from .poolers import MultiScaleRoIAlign from .ps_roi_align import ps_roi_align, PSRoIAlign @@ -38,6 +40,7 @@ "box_area", "box_iou", "generalized_box_iou", + "distance_box_iou", "roi_align", "RoIAlign", "roi_pool", @@ -56,6 +59,7 @@ "Conv3dNormActivation", "SqueezeExcitation", "generalized_box_iou_loss", + "distance_box_iou_loss", "drop_block2d", "DropBlock2d", "drop_block3d", diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 23c1001438c..b456096042f 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -310,6 +310,50 @@ def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: return iou - (areai - union) / areai +# Implementation inspired from the generalized_box_iou one. +# TODO: Some refactoring and homogenization could be done with +# the loss function in diou_loss. +def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps:float= 1e-7) -> Tensor: + """ + Return distance intersection-over-union (Jaccard index) between two sets of boxes. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``. + + Args: + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[M, 4]): second set of boxes + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values + for every element in boxes1 and boxes2 + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(distance_box_iou) + + inter, union = _box_inter_union(boxes1, boxes2) + iou = inter / union + + lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + + whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] + diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps + + + # centers of boxes + x_p = boxes1[:, None, :2].sum() / 2 + y_p = boxes1[:, None, 2:].sum() / 2 + x_g = boxes2[:, :2].sum() / 2 + y_g = boxes2[:, 2:].sum() / 2 + # The distance between boxes' centers squared. + centers_distance_squared = (_upcast(x_p - x_g) ** 2) + (_upcast(y_p - y_g) ** 2) + + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. + return iou - (centers_distance_squared / diagonal_distance_squared) + def masks_to_boxes(masks: torch.Tensor) -> torch.Tensor: """ diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py new file mode 100644 index 00000000000..645be27909e --- /dev/null +++ b/torchvision/ops/diou_loss.py @@ -0,0 +1,90 @@ + +import torch +import math +from .boxes import _upcast +from ..utils import _log_api_usage_once + +# TODO: Some parts can be refactored between gIoU, cIoU, and dIoU. +def distance_box_iou_loss( + boxes1: torch.Tensor, + boxes2: torch.Tensor, + reduction: str = "none", + eps: float = 1e-7, +) -> torch.Tensor: + """ + Original implementation from: + https://github.com/facebookresearch/detectron2/blob/dfe8d368c8b7cc2be42c5c3faf9bdcc3c08257b1/detectron2/layers/losses.py#L66 + + Gradient-friendly IoU loss with an additional penalty that is non-zero when the + distance between boxes' centers isn't zero. Indeed, for two exactly overlapping + boxes, the distance IoU is the same as the IoU loss. + This loss is symmetric, so the boxes1 and boxes2 arguments are interchangeable. + + Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with + ``0 <= x1 < x2`` and ``0 <= y1 < y2``, and The two boxes should have the + same dimensions. + + Args: + boxes1 (Tensor[N, 4] or Tensor[4]): first set of boxes + boxes2 (Tensor[N, 4] or Tensor[4]): second set of boxes + reduction (string, optional): Specifies the reduction to apply to the output: + ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be + applied to the output. ``'mean'``: The output will be averaged. + ``'sum'``: The output will be summed. Default: ``'none'`` + eps (float, optional): small number to prevent division by zero. Default: 1e-7 + Reference: + Zhaohui Zheng et. al: Distance Intersection over Union Loss: + https://arxiv.org/abs/1911.08287 + + """ + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(distance_box_iou_loss) + + boxes1 = _upcast(boxes1) + boxes2 = _upcast(boxes2) + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + if (x2 < x1).all(): + raise ValueError("x1 is larger than x2") + if (y2 < y1).all(): + raise ValueError("y1 is larger than y2") + + # Intersection keypoints + xkis1 = torch.max(x1, x1g) + ykis1 = torch.max(y1, y1g) + xkis2 = torch.min(x2, x2g) + ykis2 = torch.min(y2, y2g) + + intsct = torch.zeros_like(x1) + mask = (ykis2 > ykis1) & (xkis2 > xkis1) + intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) + union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps + iou = intsct / union + + # smallest enclosing box + xc1 = torch.min(x1, x1g) + yc1 = torch.min(y1, y1g) + xc2 = torch.max(x2, x2g) + yc2 = torch.max(y2, y2g) + # The diagonal distance of the smallest enclosing box squared + diagonal_distance_squared = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps + + # centers of boxes + x_p = (x2 + x1) / 2 + y_p = (y2 + y1) / 2 + x_g = (x1g + x2g) / 2 + y_g = (y1g + y2g) / 2 + # The distance between boxes' centers squared. + centers_distance_squared = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) + + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. + diou = iou - (centers_distance_squared / diagonal_distance_squared) + loss = 1 - diou + if reduction == "mean": + loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() + elif reduction == "sum": + loss = loss.sum() + + return loss \ No newline at end of file From ec599d21c9d170f8fa3363f694bf5cdfc72acd12 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Thu, 7 Apr 2022 17:16:04 +0200 Subject: [PATCH 02/16] [FIX] Remove URL from docstring + remove assert since it causes a big performance drop. --- torchvision/ops/diou_loss.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index 645be27909e..3fbdf61e5b5 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -12,9 +12,6 @@ def distance_box_iou_loss( eps: float = 1e-7, ) -> torch.Tensor: """ - Original implementation from: - https://github.com/facebookresearch/detectron2/blob/dfe8d368c8b7cc2be42c5c3faf9bdcc3c08257b1/detectron2/layers/losses.py#L66 - Gradient-friendly IoU loss with an additional penalty that is non-zero when the distance between boxes' centers isn't zero. Indeed, for two exactly overlapping boxes, the distance IoU is the same as the IoU loss. @@ -37,6 +34,8 @@ def distance_box_iou_loss( https://arxiv.org/abs/1911.08287 """ + # Original implementation from: + # https://github.com/facebookresearch/detectron2/blob/dfe8d368c8b7cc2be42c5c3faf9bdcc3c08257b1/detectron2/layers/losses.py#L5 if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(distance_box_iou_loss) @@ -45,11 +44,6 @@ def distance_box_iou_loss( x1, y1, x2, y2 = boxes1.unbind(dim=-1) x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) - if (x2 < x1).all(): - raise ValueError("x1 is larger than x2") - if (y2 < y1).all(): - raise ValueError("y1 is larger than y2") - # Intersection keypoints xkis1 = torch.max(x1, x1g) ykis1 = torch.max(y1, y1g) From 41703e696191f640e38f2e6ac272ad9d556e3318 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Thu, 7 Apr 2022 17:17:29 +0200 Subject: [PATCH 03/16] [FIX] eps isn't None. --- torchvision/ops/giou_loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/ops/giou_loss.py b/torchvision/ops/giou_loss.py index c43a788063e..4d6f946f5e8 100644 --- a/torchvision/ops/giou_loss.py +++ b/torchvision/ops/giou_loss.py @@ -36,7 +36,7 @@ def generalized_box_iou_loss( ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be applied to the output. ``'mean'``: The output will be averaged. ``'sum'``: The output will be summed. Default: ``'none'`` - eps (float, optional): small number to prevent division by zero. Default: 1e-7 + eps (float): small number to prevent division by zero. Default: 1e-7 Reference: Hamid Rezatofighi et. al: Generalized Intersection over Union: From 51616ed5087aa041b815ec0de9f513610d45c35f Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Wed, 13 Apr 2022 16:51:21 +0200 Subject: [PATCH 04/16] [TEST] Update existing box dIoU test + add dIoU loss tests (inspired from cIoU ones). --- test/test_ops.py | 54 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 279e959f033..5d2338755a8 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1126,6 +1126,8 @@ def _perform_box_operation(self, box: Tensor, run_as_script: bool = False) -> Te def _run_test(self, test_input: List, dtypes: List[torch.dtype], tolerance: float, expected: List) -> None: def assert_close(box: Tensor, expected: Tensor, tolerance): out = self._perform_box_operation(box) + print("The computed box is: ", out) + print("The expected one is: ", expected) torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance) for dtype in dtypes: @@ -1256,7 +1258,7 @@ def test_gen_iou(self, test_input: List, dtypes: List[torch.dtype], tolerance: f def test_giou_jit(self) -> None: self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) -class TestDistanceIoU(BoxTestBase): +class TestDistanceBoxIoU(BoxTestBase): def _target_fn(self) -> Tuple[bool, Callable]: return (True, ops.distance_box_iou) @@ -1265,7 +1267,7 @@ def _generate_int_input() -> List[List[int]]: # TODO: Update this. def _generate_int_expected() -> List[List[float]]: - return [[1.0, 0.25, -0.7778], [0.25, 1.0, -0.8611], [-0.7778, -0.8611, 1.0]] + return [[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]] def _generate_float_input() -> List[List[float]]: return [ @@ -1280,10 +1282,9 @@ def _generate_float_expected() -> List[List[float]]: @pytest.mark.parametrize( "test_input, dtypes, tolerance, expected", [ - # TODO: Fix this test. - # pytest.param( - # _generate_int_input(), [torch.int16, torch.int32, torch.int64], 1e-4, _generate_int_expected() - # ), + pytest.param( + _generate_int_input(), [torch.int16, torch.int32, torch.int64], 1e-4, _generate_int_expected() + ), pytest.param(_generate_float_input(), [torch.float16], 0.002, _generate_float_expected()), pytest.param(_generate_float_input(), [torch.float32, torch.float64], 0.001, _generate_float_expected()), ], @@ -1294,6 +1295,47 @@ def test_distance_iou(self, test_input: List, dtypes: List[torch.dtype], toleran def test_distance_iou_jit(self) -> None: self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) +class TestDistanceIoULoss: + # Inspired and adapted from: + # https://github.com/pytorch/vision/pull/5776/files#diff-d183f2afc51d6a59bc70094e8f476d2468c45e415500f6eb60abad955e065156 + + @staticmethod + def assert_distance_iou_loss(box1, box2, expected_output, dtype, reduction="none"): + output = ops.distance_box_iou_loss(box1, box2, reduction=reduction) + expected_output = torch.tensor(expected_output, dtype=dtype) + tol = 1e-5 if dtype != torch.half else 1e-3 + torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) + + # TODO: torch.half as a dtype doesn't pass the test, investigate... + @pytest.mark.parametrize("dtype", [torch.float32]) + @pytest.mark.parametrize("device", cpu_and_gpu()) + def test_distance_iou_loss(self, dtype, device): + box1 = torch.tensor([-1, -1, 1, 1], dtype=dtype, device=device) + box2 = torch.tensor([0, 0, 1, 1], dtype=dtype, device=device) + box3 = torch.tensor([0, 1, 1, 2], dtype=dtype, device=device) + box4 = torch.tensor([1, 1, 2, 2], dtype=dtype, device=device) + + box1s = torch.stack( + [box2, box2], + dim=0, + ) + box2s = torch.stack( + [box3, box4], + dim=0, + ) + + + self.assert_distance_iou_loss(box1, box1, 0.0, dtype) + + self.assert_distance_iou_loss(box1, box2, 0.8125, dtype) + + self.assert_distance_iou_loss(box1, box3, 1.1923, dtype) + + self.assert_distance_iou_loss(box1, box4, 1.2500, dtype) + + self.assert_distance_iou_loss(box1s, box2s, 1.2250, dtype, reduction="mean") + self.assert_distance_iou_loss(box1s, box2s, 2.4500, dtype, reduction="sum") + class TestMasksToBoxes: def test_masks_box(self): From 7631ab7ab977a0992595bca92934b3826a3eb22a Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Wed, 13 Apr 2022 17:24:35 +0200 Subject: [PATCH 05/16] [ENH] Some pre-commit fixes + remove print + mypy. --- test/test_ops.py | 11 +++++------ torchvision/ops/__init__.py | 2 +- torchvision/ops/boxes.py | 14 +++++++------- torchvision/ops/diou_loss.py | 18 +++++++++--------- 4 files changed, 22 insertions(+), 23 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 5d2338755a8..386e214017e 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1126,8 +1126,6 @@ def _perform_box_operation(self, box: Tensor, run_as_script: bool = False) -> Te def _run_test(self, test_input: List, dtypes: List[torch.dtype], tolerance: float, expected: List) -> None: def assert_close(box: Tensor, expected: Tensor, tolerance): out = self._perform_box_operation(box) - print("The computed box is: ", out) - print("The expected one is: ", expected) torch.testing.assert_close(out, expected, rtol=0.0, check_dtype=False, atol=tolerance) for dtype in dtypes: @@ -1258,6 +1256,7 @@ def test_gen_iou(self, test_input: List, dtypes: List[torch.dtype], tolerance: f def test_giou_jit(self) -> None: self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) + class TestDistanceBoxIoU(BoxTestBase): def _target_fn(self) -> Tuple[bool, Callable]: return (True, ops.distance_box_iou) @@ -1265,7 +1264,7 @@ def _target_fn(self) -> Tuple[bool, Callable]: def _generate_int_input() -> List[List[int]]: return [[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]] - # TODO: Update this. + # TODO: Update this. def _generate_int_expected() -> List[List[float]]: return [[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]] @@ -1295,8 +1294,9 @@ def test_distance_iou(self, test_input: List, dtypes: List[torch.dtype], toleran def test_distance_iou_jit(self) -> None: self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) + class TestDistanceIoULoss: - # Inspired and adapted from: + # Inspired and adapted from: # https://github.com/pytorch/vision/pull/5776/files#diff-d183f2afc51d6a59bc70094e8f476d2468c45e415500f6eb60abad955e065156 @staticmethod @@ -1306,7 +1306,7 @@ def assert_distance_iou_loss(box1, box2, expected_output, dtype, reduction="none tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) - # TODO: torch.half as a dtype doesn't pass the test, investigate... + # TODO: torch.half as a dtype doesn't pass the test, investigate... @pytest.mark.parametrize("dtype", [torch.float32]) @pytest.mark.parametrize("device", cpu_and_gpu()) def test_distance_iou_loss(self, dtype, device): @@ -1324,7 +1324,6 @@ def test_distance_iou_loss(self, dtype, device): dim=0, ) - self.assert_distance_iou_loss(box1, box1, 0.0, dtype) self.assert_distance_iou_loss(box1, box2, 0.8125, dtype) diff --git a/torchvision/ops/__init__.py b/torchvision/ops/__init__.py index 955159dc5cc..5292b778f51 100644 --- a/torchvision/ops/__init__.py +++ b/torchvision/ops/__init__.py @@ -12,11 +12,11 @@ ) from .boxes import box_convert from .deform_conv import deform_conv2d, DeformConv2d +from .diou_loss import distance_box_iou_loss from .drop_block import drop_block2d, DropBlock2d, drop_block3d, DropBlock3d from .feature_pyramid_network import FeaturePyramidNetwork from .focal_loss import sigmoid_focal_loss from .giou_loss import generalized_box_iou_loss -from .diou_loss import distance_box_iou_loss from .misc import FrozenBatchNorm2d, Conv2dNormActivation, Conv3dNormActivation, SqueezeExcitation from .poolers import MultiScaleRoIAlign from .ps_roi_align import ps_roi_align, PSRoIAlign diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index b456096042f..3ac3a7f3233 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -310,10 +310,11 @@ def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: return iou - (areai - union) / areai + # Implementation inspired from the generalized_box_iou one. # TODO: Some refactoring and homogenization could be done with -# the loss function in diou_loss. -def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps:float= 1e-7) -> Tensor: +# the loss function in diou_loss. +def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor: """ Return distance intersection-over-union (Jaccard index) between two sets of boxes. @@ -339,19 +340,18 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps:float= 1e-7) -> Tensor: rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] - diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps - + diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps # centers of boxes x_p = boxes1[:, None, :2].sum() / 2 y_p = boxes1[:, None, 2:].sum() / 2 x_g = boxes2[:, :2].sum() / 2 y_g = boxes2[:, 2:].sum() / 2 - # The distance between boxes' centers squared. + # The distance between boxes' centers squared. centers_distance_squared = (_upcast(x_p - x_g) ** 2) + (_upcast(y_p - y_g) ** 2) - # The distance IoU is the IoU penalized by a normalized - # distance between boxes' centers squared. + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. return iou - (centers_distance_squared / diagonal_distance_squared) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index 3fbdf61e5b5..766c8ac6f03 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -1,10 +1,10 @@ - import torch -import math -from .boxes import _upcast + from ..utils import _log_api_usage_once +from .boxes import _upcast + -# TODO: Some parts can be refactored between gIoU, cIoU, and dIoU. +# TODO: Some parts can be refactored between gIoU, cIoU, and dIoU. def distance_box_iou_loss( boxes1: torch.Tensor, boxes2: torch.Tensor, @@ -61,7 +61,7 @@ def distance_box_iou_loss( yc1 = torch.min(y1, y1g) xc2 = torch.max(x2, x2g) yc2 = torch.max(y2, y2g) - # The diagonal distance of the smallest enclosing box squared + # The diagonal distance of the smallest enclosing box squared diagonal_distance_squared = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps # centers of boxes @@ -69,11 +69,11 @@ def distance_box_iou_loss( y_p = (y2 + y1) / 2 x_g = (x1g + x2g) / 2 y_g = (y1g + y2g) / 2 - # The distance between boxes' centers squared. + # The distance between boxes' centers squared. centers_distance_squared = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) - # The distance IoU is the IoU penalized by a normalized - # distance between boxes' centers squared. + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. diou = iou - (centers_distance_squared / diagonal_distance_squared) loss = 1 - diou if reduction == "mean": @@ -81,4 +81,4 @@ def distance_box_iou_loss( elif reduction == "sum": loss = loss.sum() - return loss \ No newline at end of file + return loss From 8ceffcc38bd463e16281b8e635bb4aa4772f0015 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Wed, 13 Apr 2022 17:43:40 +0200 Subject: [PATCH 06/16] [ENH] Pass the device in the assertion for the dIoU loss test. --- test/test_ops.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index e2f5235a5bd..f7ab4332bfe 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1301,9 +1301,9 @@ class TestDistanceIoULoss: # https://github.com/pytorch/vision/pull/5776/files#diff-d183f2afc51d6a59bc70094e8f476d2468c45e415500f6eb60abad955e065156 @staticmethod - def assert_distance_iou_loss(box1, box2, expected_output, dtype, reduction="none"): + def assert_distance_iou_loss(box1, box2, expected_output, dtype, device, reduction="none"): output = ops.distance_box_iou_loss(box1, box2, reduction=reduction) - expected_output = torch.tensor(expected_output, dtype=dtype) + expected_output = torch.tensor(expected_output, dtype=dtype, device=device) tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) @@ -1325,16 +1325,16 @@ def test_distance_iou_loss(self, dtype, device): dim=0, ) - self.assert_distance_iou_loss(box1, box1, 0.0, dtype) + self.assert_distance_iou_loss(box1, box1, 0.0, dtype, device) - self.assert_distance_iou_loss(box1, box2, 0.8125, dtype) + self.assert_distance_iou_loss(box1, box2, 0.8125, dtype, device) - self.assert_distance_iou_loss(box1, box3, 1.1923, dtype) + self.assert_distance_iou_loss(box1, box3, 1.1923, dtype, device) - self.assert_distance_iou_loss(box1, box4, 1.2500, dtype) + self.assert_distance_iou_loss(box1, box4, 1.2500, dtype, device) - self.assert_distance_iou_loss(box1s, box2s, 1.2250, dtype, reduction="mean") - self.assert_distance_iou_loss(box1s, box2s, 2.4500, dtype, reduction="sum") + self.assert_distance_iou_loss(box1s, box2s, 1.2250, dtype, device, reduction="mean") + self.assert_distance_iou_loss(box1s, box2s, 2.4500, dtype, device, reduction="sum") class TestMasksToBoxes: From a4e58b7f652e9cda5d10a3ab18086124c4954ca3 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Thu, 14 Apr 2022 11:51:06 +0200 Subject: [PATCH 07/16] [FIX] Remove type hints from the dIoU box test. --- test/test_ops.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index f7ab4332bfe..d8a3c8189ba 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1259,24 +1259,24 @@ def test_giou_jit(self) -> None: class TestDistanceBoxIoU(BoxTestBase): - def _target_fn(self) -> Tuple[bool, Callable]: + def _target_fn(self): return (True, ops.distance_box_iou) - def _generate_int_input() -> List[List[int]]: + def _generate_int_input(): return [[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]] # TODO: Update this. - def _generate_int_expected() -> List[List[float]]: + def _generate_int_expected(): return [[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]] - def _generate_float_input() -> List[List[float]]: + def _generate_float_input(): return [ [285.3538, 185.5758, 1193.5110, 851.4551], [285.1472, 188.7374, 1192.4984, 851.0669], [279.2440, 197.9812, 1189.4746, 849.2019], ] - def _generate_float_expected() -> List[List[float]]: + def _generate_float_expected(): return [[1.0, 0.9933, 0.9673], [0.9933, 1.0, 0.9737], [0.9673, 0.9737, 1.0]] @pytest.mark.parametrize( @@ -1289,10 +1289,10 @@ def _generate_float_expected() -> List[List[float]]: pytest.param(_generate_float_input(), [torch.float32, torch.float64], 0.001, _generate_float_expected()), ], ) - def test_distance_iou(self, test_input: List, dtypes: List[torch.dtype], tolerance: float, expected: List) -> None: + def test_distance_iou(self, test_input, dtypes, tolerance, expected): self._run_test(test_input, dtypes, tolerance, expected) - def test_distance_iou_jit(self) -> None: + def test_distance_iou_jit(self): self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) @@ -1304,10 +1304,13 @@ class TestDistanceIoULoss: def assert_distance_iou_loss(box1, box2, expected_output, dtype, device, reduction="none"): output = ops.distance_box_iou_loss(box1, box2, reduction=reduction) expected_output = torch.tensor(expected_output, dtype=dtype, device=device) + print("I am the output dtype", output.dtype) + print("I am the expected dtype", expected_output.dtype) tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) - # TODO: torch.half as a dtype doesn't pass the test, investigate... + # TODO: torch.half as a dtype doesn't pass the test. + # In fact, probably there is an ops that autocasts to float32. @pytest.mark.parametrize("dtype", [torch.float32]) @pytest.mark.parametrize("device", cpu_and_gpu()) def test_distance_iou_loss(self, dtype, device): From 0ead2c3aecc072e093082e39c79eb9113e43f053 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Thu, 21 Apr 2022 15:14:21 +0200 Subject: [PATCH 08/16] [ENH] Refactor box and loss for dIoU functions + fix half tests. --- test/test_ops.py | 15 +++------ torchvision/ops/boxes.py | 8 ++--- torchvision/ops/diou_loss.py | 61 ++++++++---------------------------- 3 files changed, 22 insertions(+), 62 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 3f778aad8a1..cc080b72394 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1265,7 +1265,6 @@ def _target_fn(self): def _generate_int_input(): return [[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]] - # TODO: Update this. def _generate_int_expected(): return [[1.0, 0.25, 0.0], [0.25, 1.0, 0.0], [0.0, 0.0, 1.0]] @@ -1304,14 +1303,10 @@ class TestDistanceIoULoss: def assert_distance_iou_loss(box1, box2, expected_output, dtype, device, reduction="none"): output = ops.distance_box_iou_loss(box1, box2, reduction=reduction) expected_output = torch.tensor(expected_output, dtype=dtype, device=device) - print("I am the output dtype", output.dtype) - print("I am the expected dtype", expected_output.dtype) tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) - # TODO: torch.half as a dtype doesn't pass the test. - # In fact, probably there is an ops that autocasts to float32. - @pytest.mark.parametrize("dtype", [torch.float32]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) @pytest.mark.parametrize("device", cpu_and_gpu()) def test_distance_iou_loss(self, dtype, device): box1 = torch.tensor([-1, -1, 1, 1], dtype=dtype, device=device) @@ -1330,14 +1325,14 @@ def test_distance_iou_loss(self, dtype, device): self.assert_distance_iou_loss(box1, box1, 0.0, dtype, device) - self.assert_distance_iou_loss(box1, box2, 0.8125, dtype, device) + self.assert_distance_iou_loss(box1, box2, 0.8750, dtype, device) self.assert_distance_iou_loss(box1, box3, 1.1923, dtype, device) - self.assert_distance_iou_loss(box1, box4, 1.2500, dtype, device) + self.assert_distance_iou_loss(box1, box4, 1.2778, dtype, device) - self.assert_distance_iou_loss(box1s, box2s, 1.2250, dtype, device, reduction="mean") - self.assert_distance_iou_loss(box1s, box2s, 2.4500, dtype, device, reduction="sum") + self.assert_distance_iou_loss(box1s, box2s, 1.9000, dtype, device, reduction="mean") + self.assert_distance_iou_loss(box1s, box2s, 3.8000, dtype, device, reduction="sum") class TestMasksToBoxes: diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 3ac3a7f3233..72a3cae5fba 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -336,8 +336,8 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso inter, union = _box_inter_union(boxes1, boxes2) iou = inter / union - lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) - rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) + lti = torch.min(boxes1[:, None, :2], boxes2[:, None, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, None, 2:]) whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps @@ -345,8 +345,8 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso # centers of boxes x_p = boxes1[:, None, :2].sum() / 2 y_p = boxes1[:, None, 2:].sum() / 2 - x_g = boxes2[:, :2].sum() / 2 - y_g = boxes2[:, 2:].sum() / 2 + x_g = boxes2[:, None, :2].sum() / 2 + y_g = boxes2[:, None, 2:].sum() / 2 # The distance between boxes' centers squared. centers_distance_squared = (_upcast(x_p - x_g) ** 2) + (_upcast(y_p - y_g) ** 2) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index 766c8ac6f03..71e8aa03137 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -1,10 +1,6 @@ +from .boxes import distance_box_iou import torch -from ..utils import _log_api_usage_once -from .boxes import _upcast - - -# TODO: Some parts can be refactored between gIoU, cIoU, and dIoU. def distance_box_iou_loss( boxes1: torch.Tensor, boxes2: torch.Tensor, @@ -29,56 +25,25 @@ def distance_box_iou_loss( applied to the output. ``'mean'``: The output will be averaged. ``'sum'``: The output will be summed. Default: ``'none'`` eps (float, optional): small number to prevent division by zero. Default: 1e-7 + + Returns: + Tensor[]: Loss tensor with the reduction option applied. + Reference: Zhaohui Zheng et. al: Distance Intersection over Union Loss: https://arxiv.org/abs/1911.08287 - """ - # Original implementation from: - # https://github.com/facebookresearch/detectron2/blob/dfe8d368c8b7cc2be42c5c3faf9bdcc3c08257b1/detectron2/layers/losses.py#L5 - if not torch.jit.is_scripting() and not torch.jit.is_tracing(): - _log_api_usage_once(distance_box_iou_loss) - - boxes1 = _upcast(boxes1) - boxes2 = _upcast(boxes2) - x1, y1, x2, y2 = boxes1.unbind(dim=-1) - x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) - - # Intersection keypoints - xkis1 = torch.max(x1, x1g) - ykis1 = torch.max(y1, y1g) - xkis2 = torch.min(x2, x2g) - ykis2 = torch.min(y2, y2g) - - intsct = torch.zeros_like(x1) - mask = (ykis2 > ykis1) & (xkis2 > xkis1) - intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) - union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps - iou = intsct / union - - # smallest enclosing box - xc1 = torch.min(x1, x1g) - yc1 = torch.min(y1, y1g) - xc2 = torch.max(x2, x2g) - yc2 = torch.max(y2, y2g) - # The diagonal distance of the smallest enclosing box squared - diagonal_distance_squared = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps - - # centers of boxes - x_p = (x2 + x1) / 2 - y_p = (y2 + y1) / 2 - x_g = (x1g + x2g) / 2 - y_g = (y1g + y2g) / 2 - # The distance between boxes' centers squared. - centers_distance_squared = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) - - # The distance IoU is the IoU penalized by a normalized - # distance between boxes' centers squared. - diou = iou - (centers_distance_squared / diagonal_distance_squared) + if boxes1.dim() == 1 and boxes2.dim() == 1: + batch_boxes1 = boxes1.unsqueeze(0) + batch_boxes2 = boxes2.unsqueeze(0) + diou = distance_box_iou(batch_boxes1, batch_boxes2, eps)[0, 0] + else: + diou = distance_box_iou(boxes1, boxes2, eps)[0] loss = 1 - diou if reduction == "mean": loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() elif reduction == "sum": loss = loss.sum() - + # Cast the loss to the same dtype as the input boxes + loss = loss.to(boxes1.dtype) return loss From a2702f83b1f8b5f5099098601dd0af53d77ba04a Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Thu, 21 Apr 2022 15:19:29 +0200 Subject: [PATCH 09/16] =?UTF-8?q?[FIX]=C2=A0Precommits=20fix.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- torchvision/ops/diou_loss.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index 71e8aa03137..a66d529c9b2 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -1,6 +1,8 @@ -from .boxes import distance_box_iou import torch +from .boxes import distance_box_iou + + def distance_box_iou_loss( boxes1: torch.Tensor, boxes2: torch.Tensor, @@ -44,6 +46,6 @@ def distance_box_iou_loss( loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() elif reduction == "sum": loss = loss.sum() - # Cast the loss to the same dtype as the input boxes + # Cast the loss to the same dtype as the input boxes loss = loss.to(boxes1.dtype) return loss From 497a7c1b2d4ccebd0eb1372ccdb37283687f4880 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Tue, 26 Apr 2022 11:43:19 +0200 Subject: [PATCH 10/16] [ENH] Some improvement for the distance IoU tests thanks to code review. --- test/test_ops.py | 50 ++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index cc080b72394..b93c9df5b35 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1295,44 +1295,40 @@ def test_distance_iou_jit(self): self._run_jit_test([[0, 0, 100, 100], [0, 0, 50, 50], [200, 200, 300, 300]]) -class TestDistanceIoULoss: - # Inspired and adapted from: - # https://github.com/pytorch/vision/pull/5776/files#diff-d183f2afc51d6a59bc70094e8f476d2468c45e415500f6eb60abad955e065156 +@pytest.mark.parametrize("dtype", [torch.float32, torch.half]) +@pytest.mark.parametrize("device", cpu_and_gpu()) +def test_distance_iou_loss(dtype, device): + box1 = torch.tensor([-1, -1, 1, 1], dtype=dtype, device=device) + box2 = torch.tensor([0, 0, 1, 1], dtype=dtype, device=device) + box3 = torch.tensor([0, 1, 1, 2], dtype=dtype, device=device) + box4 = torch.tensor([1, 1, 2, 2], dtype=dtype, device=device) + + box1s = torch.stack( + [box2, box2], + dim=0, + ) + box2s = torch.stack( + [box3, box4], + dim=0, + ) - @staticmethod - def assert_distance_iou_loss(box1, box2, expected_output, dtype, device, reduction="none"): + def assert_distance_iou_loss(box1, box2, expected_output, reduction="none"): output = ops.distance_box_iou_loss(box1, box2, reduction=reduction) expected_output = torch.tensor(expected_output, dtype=dtype, device=device) tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) - @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) - @pytest.mark.parametrize("device", cpu_and_gpu()) - def test_distance_iou_loss(self, dtype, device): - box1 = torch.tensor([-1, -1, 1, 1], dtype=dtype, device=device) - box2 = torch.tensor([0, 0, 1, 1], dtype=dtype, device=device) - box3 = torch.tensor([0, 1, 1, 2], dtype=dtype, device=device) - box4 = torch.tensor([1, 1, 2, 2], dtype=dtype, device=device) - - box1s = torch.stack( - [box2, box2], - dim=0, - ) - box2s = torch.stack( - [box3, box4], - dim=0, - ) - self.assert_distance_iou_loss(box1, box1, 0.0, dtype, device) + assert_distance_iou_loss(box1, box1, 0.0) - self.assert_distance_iou_loss(box1, box2, 0.8750, dtype, device) + assert_distance_iou_loss(box1, box2, 0.8750) - self.assert_distance_iou_loss(box1, box3, 1.1923, dtype, device) + assert_distance_iou_loss(box1, box3, 1.1923) - self.assert_distance_iou_loss(box1, box4, 1.2778, dtype, device) + assert_distance_iou_loss(box1, box4, 1.2778) - self.assert_distance_iou_loss(box1s, box2s, 1.9000, dtype, device, reduction="mean") - self.assert_distance_iou_loss(box1s, box2s, 3.8000, dtype, device, reduction="sum") + assert_distance_iou_loss(box1s, box2s, 1.9000, reduction="mean") + assert_distance_iou_loss(box1s, box2s, 3.8000, reduction="sum") class TestMasksToBoxes: From a054032806784088239e4bfcce7e83d635ef4649 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Tue, 26 Apr 2022 12:10:23 +0200 Subject: [PATCH 11/16] [ENH] Upcast in distance boxes computation to avoid overflow. --- test/test_ops.py | 1 - torchvision/ops/boxes.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index b93c9df5b35..099ed711d47 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1318,7 +1318,6 @@ def assert_distance_iou_loss(box1, box2, expected_output, reduction="none"): tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) - assert_distance_iou_loss(box1, box1, 0.0) assert_distance_iou_loss(box1, box2, 0.8750) diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 72a3cae5fba..575506c5bb3 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -311,9 +311,6 @@ def generalized_box_iou(boxes1: Tensor, boxes2: Tensor) -> Tensor: return iou - (areai - union) / areai -# Implementation inspired from the generalized_box_iou one. -# TODO: Some refactoring and homogenization could be done with -# the loss function in diou_loss. def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tensor: """ Return distance intersection-over-union (Jaccard index) between two sets of boxes. @@ -333,6 +330,9 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(distance_box_iou) + boxes1 = _upcast(boxes1) + boxes2 = _upcast(boxes2) + inter, union = _box_inter_union(boxes1, boxes2) iou = inter / union From d7baa673e38ed557eee1355fe0fd08b5e9b00528 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Tue, 26 Apr 2022 15:11:52 +0200 Subject: [PATCH 12/16] [ENH] Revert the refactor of distance IoU loss back since it introduced a bug and can be slow. --- test/test_ops.py | 20 ++++++------- torchvision/ops/boxes.py | 8 ++--- torchvision/ops/diou_loss.py | 57 ++++++++++++++++++++++++++++-------- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 099ed711d47..953421123e3 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1298,10 +1298,10 @@ def test_distance_iou_jit(self): @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) @pytest.mark.parametrize("device", cpu_and_gpu()) def test_distance_iou_loss(dtype, device): - box1 = torch.tensor([-1, -1, 1, 1], dtype=dtype, device=device) - box2 = torch.tensor([0, 0, 1, 1], dtype=dtype, device=device) - box3 = torch.tensor([0, 1, 1, 2], dtype=dtype, device=device) - box4 = torch.tensor([1, 1, 2, 2], dtype=dtype, device=device) + box1 = torch.tensor([[-1, -1, 1, 1]], dtype=dtype, device=device) + box2 = torch.tensor([[0, 0, 1, 1]], dtype=dtype, device=device) + box3 = torch.tensor([[0, 1, 1, 2]], dtype=dtype, device=device) + box4 = torch.tensor([[1, 1, 2, 2]], dtype=dtype, device=device) box1s = torch.stack( [box2, box2], @@ -1318,16 +1318,16 @@ def assert_distance_iou_loss(box1, box2, expected_output, reduction="none"): tol = 1e-5 if dtype != torch.half else 1e-3 torch.testing.assert_close(output, expected_output, rtol=tol, atol=tol) - assert_distance_iou_loss(box1, box1, 0.0) + assert_distance_iou_loss(box1, box1, [0.0]) - assert_distance_iou_loss(box1, box2, 0.8750) + assert_distance_iou_loss(box1, box2, [0.8125]) - assert_distance_iou_loss(box1, box3, 1.1923) + assert_distance_iou_loss(box1, box3, [1.1923]) - assert_distance_iou_loss(box1, box4, 1.2778) + assert_distance_iou_loss(box1, box4, [1.2500]) - assert_distance_iou_loss(box1s, box2s, 1.9000, reduction="mean") - assert_distance_iou_loss(box1s, box2s, 3.8000, reduction="sum") + assert_distance_iou_loss(box1s, box2s, 1.2250, reduction="mean") + assert_distance_iou_loss(box1s, box2s, 2.4500, reduction="sum") class TestMasksToBoxes: diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 575506c5bb3..f2d58661d21 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -336,8 +336,8 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso inter, union = _box_inter_union(boxes1, boxes2) iou = inter / union - lti = torch.min(boxes1[:, None, :2], boxes2[:, None, :2]) - rbi = torch.max(boxes1[:, None, 2:], boxes2[:, None, 2:]) + lti = torch.min(boxes1[:, None, :2], boxes2[:, :2]) + rbi = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) whi = _upcast(rbi - lti).clamp(min=0) # [N,M,2] diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps @@ -345,8 +345,8 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso # centers of boxes x_p = boxes1[:, None, :2].sum() / 2 y_p = boxes1[:, None, 2:].sum() / 2 - x_g = boxes2[:, None, :2].sum() / 2 - y_g = boxes2[:, None, 2:].sum() / 2 + x_g = boxes2[:, :2].sum() / 2 + y_g = boxes2[:, 2:].sum() / 2 # The distance between boxes' centers squared. centers_distance_squared = (_upcast(x_p - x_g) ** 2) + (_upcast(y_p - y_g) ** 2) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index a66d529c9b2..355e29d39f8 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -1,6 +1,6 @@ import torch - -from .boxes import distance_box_iou +from ..utils import _log_api_usage_once +from .boxes import _upcast def distance_box_iou_loss( @@ -20,8 +20,8 @@ def distance_box_iou_loss( same dimensions. Args: - boxes1 (Tensor[N, 4] or Tensor[4]): first set of boxes - boxes2 (Tensor[N, 4] or Tensor[4]): second set of boxes + boxes1 (Tensor[N, 4]): first set of boxes + boxes2 (Tensor[N, 4]): second set of boxes reduction (string, optional): Specifies the reduction to apply to the output: ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: No reduction will be applied to the output. ``'mean'``: The output will be averaged. @@ -35,17 +35,50 @@ def distance_box_iou_loss( Zhaohui Zheng et. al: Distance Intersection over Union Loss: https://arxiv.org/abs/1911.08287 """ - if boxes1.dim() == 1 and boxes2.dim() == 1: - batch_boxes1 = boxes1.unsqueeze(0) - batch_boxes2 = boxes2.unsqueeze(0) - diou = distance_box_iou(batch_boxes1, batch_boxes2, eps)[0, 0] - else: - diou = distance_box_iou(boxes1, boxes2, eps)[0] + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(distance_box_iou_loss) + + # TODO: Removing the _upcast call makes the torch.half tests in test_ops pass + # but we might get overflow problems... How to fix without casting at the end? + # boxes1 = _upcast(boxes1) + # boxes2 = _upcast(boxes2) + x1, y1, x2, y2 = boxes1.unbind(dim=-1) + x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1) + + # Intersection keypoints + xkis1 = torch.max(x1, x1g) + ykis1 = torch.max(y1, y1g) + xkis2 = torch.min(x2, x2g) + ykis2 = torch.min(y2, y2g) + + intsct = torch.zeros_like(x1) + mask = (ykis2 > ykis1) & (xkis2 > xkis1) + intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask]) + union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps + iou = intsct / union + + # smallest enclosing box + xc1 = torch.min(x1, x1g) + yc1 = torch.min(y1, y1g) + xc2 = torch.max(x2, x2g) + yc2 = torch.max(y2, y2g) + # The diagonal distance of the smallest enclosing box squared + diagonal_distance_squared = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps + + # centers of boxes + x_p = (x2 + x1) / 2 + y_p = (y2 + y1) / 2 + x_g = (x1g + x2g) / 2 + y_g = (y1g + y2g) / 2 + # The distance between boxes' centers squared. + centers_distance_squared = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2) + + # The distance IoU is the IoU penalized by a normalized + # distance between boxes' centers squared. + diou = iou - (centers_distance_squared / diagonal_distance_squared) loss = 1 - diou if reduction == "mean": loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum() elif reduction == "sum": loss = loss.sum() - # Cast the loss to the same dtype as the input boxes - loss = loss.to(boxes1.dtype) return loss From 4213ee4ab02bdcfe0304ab44a460837d52578a91 Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Tue, 26 Apr 2022 15:52:54 +0200 Subject: [PATCH 13/16] Precommit fix. --- torchvision/ops/diou_loss.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index 355e29d39f8..62cbf6f61b5 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -1,4 +1,5 @@ import torch + from ..utils import _log_api_usage_once from .boxes import _upcast @@ -39,7 +40,7 @@ def distance_box_iou_loss( _log_api_usage_once(distance_box_iou_loss) # TODO: Removing the _upcast call makes the torch.half tests in test_ops pass - # but we might get overflow problems... How to fix without casting at the end? + # but we might get overflow problems... How to fix without casting at the end? # boxes1 = _upcast(boxes1) # boxes2 = _upcast(boxes2) x1, y1, x2, y2 = boxes1.unbind(dim=-1) From 2856947d5a91cecd7a9e176a5b0431ff864c94bc Mon Sep 17 00:00:00 2001 From: Yassine Alouini Date: Mon, 2 May 2022 11:10:05 +0200 Subject: [PATCH 14/16] [FIX] Few changes introduced by merge conflict. --- test/test_ops.py | 3 +++ torchvision/ops/boxes.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 3f4ca90137e..dd11084ca97 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1281,6 +1281,9 @@ def _generate_float_expected(): @pytest.mark.parametrize( "test_input, dtypes, tolerance, expected", [ + pytest.param( + _generate_int_input(), [torch.int16, torch.int32, torch.int64], 1e-4, _generate_int_expected() + ), pytest.param(_generate_float_input(), [torch.float16], 0.002, _generate_float_expected()), pytest.param(_generate_float_input(), [torch.float32, torch.float64], 0.001, _generate_float_expected()), ], diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 83d49932d65..3b994879ecf 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -372,11 +372,11 @@ def distance_box_iou(boxes1: Tensor, boxes2: Tensor, eps: float = 1e-7) -> Tenso eps (float, optional): small number to prevent division by zero. Default: 1e-7 Returns: - Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values + Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values for every element in boxes1 and boxes2 """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): - _log_api_usage_once(complete_box_iou) + _log_api_usage_once(distance_box_iou) boxes1 = _upcast(boxes1) boxes2 = _upcast(boxes2) From 3a9d3d7799e2502fe315bd987a614bcb88d3781f Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 9 May 2022 12:43:26 +0100 Subject: [PATCH 15/16] Add code reference --- torchvision/ops/diou_loss.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torchvision/ops/diou_loss.py b/torchvision/ops/diou_loss.py index 08420464a6c..ea7ead19344 100644 --- a/torchvision/ops/diou_loss.py +++ b/torchvision/ops/diou_loss.py @@ -30,12 +30,15 @@ def distance_box_iou_loss( eps (float, optional): small number to prevent division by zero. Default: 1e-7 Returns: - Tensor[]: Loss tensor with the reduction option applied. + Tensor: Loss tensor with the reduction option applied. Reference: Zhaohui Zheng et. al: Distance Intersection over Union Loss: https://arxiv.org/abs/1911.08287 """ + + # Original Implementation : https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/losses.py + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(distance_box_iou_loss) From 1b2f1e6803b1ddbaac20f8667801b8020f4d26dc Mon Sep 17 00:00:00 2001 From: Vasilis Vryniotis Date: Mon, 9 May 2022 14:20:10 +0100 Subject: [PATCH 16/16] Fix test --- test/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_ops.py b/test/test_ops.py index dd11084ca97..96cfb630e8d 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1341,7 +1341,7 @@ def test_empty_distance_iou_inputs(dtype, device) -> None: loss.backward() tol = 1e-3 if dtype is torch.half else 1e-5 - torch.testing.assert_close(loss, torch.tensor(0.0), rtol=tol, atol=tol) + torch.testing.assert_close(loss, torch.tensor(0.0, device=device), rtol=tol, atol=tol) assert box1.grad is not None, "box1.grad should not be None after backward is called" assert box2.grad is not None, "box2.grad should not be None after backward is called"