[attention] Fix attention (#2656)

patrickvonplaten · web-flow · commit 4ae54b37898c · 2023-03-13T19:10:33.000+01:00
* [attention] Fix attention

* fix

* correct
diff --git a/src/diffusers/models/attention.py b/src/diffusers/models/attention.py
@@ -271,9 +271,10 @@ def __init__(
     def forward(
         self,
         hidden_states,
+        attention_mask=None,
         encoder_hidden_states=None,
+        encoder_attention_mask=None,
         timestep=None,
-        attention_mask=None,
         cross_attention_kwargs=None,
         class_labels=None,
     ):
@@ -302,12 +303,14 @@ def forward(
             norm_hidden_states = (
                 self.norm2(hidden_states, timestep) if self.use_ada_layer_norm else self.norm2(hidden_states)
             )
+            # TODO (Birch-San): Here we should prepare the encoder_attention mask correctly
+            # prepare attention mask here
 
             # 2. Cross-Attention
             attn_output = self.attn2(
                 norm_hidden_states,
                 encoder_hidden_states=encoder_hidden_states,
-                attention_mask=attention_mask,
+                attention_mask=encoder_attention_mask,
                 **cross_attention_kwargs,
             )
             hidden_states = attn_output + hidden_states
diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion.py b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -737,7 +737,7 @@ def test_stable_diffusion_vae_tiling(self):
 
         # make sure that more than 4 GB is allocated
         mem_bytes = torch.cuda.max_memory_allocated()
-        assert mem_bytes > 4e9
+        assert mem_bytes > 5e9
         assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-2
 
     def test_stable_diffusion_fp16_vs_autocast(self):