Skip to content

Add stereo preset transforms #6549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
Sep 22, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
9de8cc9
Added transforms for Stereo Matching
TeodorPoncu Sep 8, 2022
376b81b
changed implicit Y scaling to 0.
TeodorPoncu Sep 8, 2022
ec0c3b1
Adressed some comments
TeodorPoncu Sep 9, 2022
f5cec5a
addressed type hint
TeodorPoncu Sep 9, 2022
bce5e7d
Added interpolation random interpolation strategy
TeodorPoncu Sep 9, 2022
efc1087
Aligned crop get params
TeodorPoncu Sep 9, 2022
28c89ad
Merge branch 'main' into add-stereo-preset-transforms
TeodorPoncu Sep 9, 2022
bf93fc0
fixed bug in RandomErase
TeodorPoncu Sep 13, 2022
f67a21e
Adressed scaling and typos
TeodorPoncu Sep 13, 2022
3a80766
Adressed occlusion typo
TeodorPoncu Sep 13, 2022
dad77fb
Changed parameter order in F.erase
TeodorPoncu Sep 13, 2022
1bc61a4
fixed random erase
TeodorPoncu Sep 13, 2022
279ec0f
Added inference preset transform for stereo matching
TeodorPoncu Sep 13, 2022
98de4a4
added contiguous reshape to output tensors
TeodorPoncu Sep 13, 2022
ab0d54c
Adressed comments
TeodorPoncu Sep 14, 2022
a9c3682
Modified the transform preset to use Tuple[int, int]
TeodorPoncu Sep 14, 2022
a99ebb5
adressed NITs
TeodorPoncu Sep 15, 2022
bb6632d
added grayscale transform, align resize -> mask
TeodorPoncu Sep 15, 2022
5570313
changed max disparity default behaviour
TeodorPoncu Sep 16, 2022
df3461d
added fixed resize, changed masking in sparse flow masking
TeodorPoncu Sep 16, 2022
e294d92
Merge branch 'main' into add-stereo-preset-transforms
TeodorPoncu Sep 16, 2022
568544c
update to align with argparse
TeodorPoncu Sep 17, 2022
11154ea
Merge branch 'add-stereo-preset-transforms' of https://github.com/Teo…
TeodorPoncu Sep 17, 2022
5683601
changed default mask in asymetric pairs
TeodorPoncu Sep 17, 2022
f5a4423
moved grayscale order
TeodorPoncu Sep 18, 2022
c121c2f
changed grayscale api to accept to tensor variant
TeodorPoncu Sep 18, 2022
19a43c6
mypy fix
TeodorPoncu Sep 19, 2022
c55e555
changed resize specs
TeodorPoncu Sep 20, 2022
d9a726e
adressed nits
TeodorPoncu Sep 20, 2022
655cb18
Merge branch 'main' into add-stereo-preset-transforms
jdsgomes Sep 21, 2022
872703b
added type hints
TeodorPoncu Sep 21, 2022
e3a3e0f
mypy fix
TeodorPoncu Sep 21, 2022
6e04c24
mypy fix
TeodorPoncu Sep 21, 2022
d1fe636
mypy fix
TeodorPoncu Sep 21, 2022
c7f0f74
Merge branch 'main' into add-stereo-preset-transforms
TeodorPoncu Sep 21, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions references/depth/stereo/presets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from typing import Tuple, Union

import torch
import transforms as T


class StereoMatchingEvalPreset(torch.nn.Module):
def __init__(
self, mean: float = 0.5, std: float = 0.5, resize_size=None, interpolation_type: str = "bilinear"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add type hint for resize_size (I think it should be Optional[Tuple[int, int]] in this case)

) -> None:
super().__init__()

transforms = [
T.ToTensor(),
T.MakeValidDisparityMask(512), # we keep this transform for API consistency
T.ConvertImageDtype(torch.float32),
T.Normalize(mean=mean, std=std),
T.ValidateModelInput(),
]

if resize_size is not None:
transforms.append(T.Resize(resize_size, interpolation_type=interpolation_type))

self.transforms = T.Compose(transforms)

def forward(self, images, disparities, masks):
return self.transforms(images, disparities, masks)


class StereoMatchingTrainPreset(torch.nn.Module):
def __init__(
self,
*,
# RandomResizeAndCrop params
crop_size: Tuple[int, int],
resize_prob: float = 1.0,
scaling_type: str = "exponential",
scale_range: Tuple[float, float] = (-0.2, 0.5),
scale_interpolation_type: str = "bilinear",
# normalization params:
mean: float = 0.5,
std: float = 0.5,
# processing device
gpu_transforms=False,
# masking
max_disparity: int = 256,
# SpatialShift params
spatial_shift_prob: float = 0.5,
spatial_shift_max_angle: float = 0.5,
spatial_shift_max_displacement: float = 0.5,
spatial_shift_interpolation_type: str = "bilinear",
# AssymetricColorJitter
gamma_range: Tuple[float, float] = (0.8, 1.2),
brightness: Union[int, Tuple[int, int]] = (0.8, 1.2),
contrast: Union[int, Tuple[int, int]] = (0.8, 1.2),
saturation: Union[int, Tuple[int, int]] = 0.0,
hue: Union[int, Tuple[int, int]] = 0.0,
asymmetric_jitter_prob: float = 1.0,
# RandomHorizontalFlip
horizontal_flip_prob=0.5,
# RandomOcclusion
occlusion_prob: float = 0.0,
occlusion_px_range: Tuple[int, int] = (50, 100),
# RandomErase
erase_prob: float = 0.0,
erase_px_range: Tuple[int, int] = (50, 100),
erase_num_repeats: int = 1,
) -> None:

if scaling_type not in ["linear", "exponential"]:
raise ValueError(f"Unknown scaling type: {scaling_type}. Available types: linear, exponential")

super().__init__()
transforms = [T.ToTensor()]
if gpu_transforms:
transforms.append(T.ToGPU())

transforms.extend(
[
T.AsymmetricColorJitter(
brightness=brightness, contrast=contrast, saturation=saturation, hue=hue, p=asymmetric_jitter_prob
),
T.AsymetricGammaAdjust(p=asymmetric_jitter_prob, gamma_range=gamma_range),
T.RandomSpatialShift(
p=spatial_shift_prob,
max_angle=spatial_shift_max_angle,
max_displacement=spatial_shift_max_displacement,
interpolation_type=spatial_shift_interpolation_type,
),
T.ConvertImageDtype(torch.float32),
T.RandomResizeAndCrop(
crop_size=crop_size,
scale_range=scale_range,
resize_prob=resize_prob,
scaling_type=scaling_type,
interpolation_type=scale_interpolation_type,
),
T.RandomHorizontalFlip(horizontal_flip_prob),
# occlusion after flip, otherwise we're occluding the reference image
T.RandomOcclusion(p=occlusion_prob, occlusion_px_range=occlusion_px_range),
T.RandomErase(p=erase_prob, erase_px_range=erase_px_range, num_repeats=erase_num_repeats),
T.Normalize(mean=mean, std=std),
T.MakeValidDisparityMask(max_disparity),
T.ValidateModelInput(),
]
)

self.transforms = T.Compose(transforms)

def forward(self, images, disparties, mask):
return self.transforms(images, disparties, mask)
Loading