address #330

lucidrains · lucidrains · commit 82f2fa751d9a · 2024-10-04T07:01:48.000-07:00
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 setup(
   name = 'vit-pytorch',
   packages = find_packages(exclude=['examples']),
-  version = '1.7.12',
+  version = '1.7.14',
   license='MIT',
   description = 'Vision Transformer (ViT) - Pytorch',
   long_description=long_description,
diff --git a/vit_pytorch/regionvit.py b/vit_pytorch/regionvit.py
@@ -20,6 +20,18 @@ def divisible_by(val, d):
 
 # helper classes
 
+class ChanLayerNorm(nn.Module):
+    def __init__(self, dim, eps = 1e-5):
+        super().__init__()
+        self.eps = eps
+        self.g = nn.Parameter(torch.ones(1, dim, 1, 1))
+        self.b = nn.Parameter(torch.zeros(1, dim, 1, 1))
+
+    def forward(self, x):
+        var = torch.var(x, dim = 1, unbiased = False, keepdim = True)
+        mean = torch.mean(x, dim = 1, keepdim = True)
+        return (x - mean) / (var + self.eps).sqrt() * self.g + self.b
+
 class Downsample(nn.Module):
     def __init__(self, dim_in, dim_out):
         super().__init__()
@@ -212,10 +224,10 @@ def __init__(
         if tokenize_local_3_conv:
             self.local_encoder = nn.Sequential(
                 nn.Conv2d(3, init_dim, 3, 2, 1),
-                nn.LayerNorm(init_dim),
+                ChanLayerNorm(init_dim),
                 nn.GELU(),
                 nn.Conv2d(init_dim, init_dim, 3, 2, 1),
-                nn.LayerNorm(init_dim),
+                ChanLayerNorm(init_dim),
                 nn.GELU(),
                 nn.Conv2d(init_dim, init_dim, 3, 1, 1)
             )