Fix performance issue introduced by torch cuda cache clear during generation

Kyle0654 · Kyle0654 · commit 8dc7f119e50e · 2022-11-10T23:01:32.000-08:00
diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py
@@ -282,7 +282,6 @@ def einsum_op_cuda(self, q, k, v):
 
     def get_attention_mem_efficient(self, q, k, v):
         if q.device.type == 'cuda':
-            torch.cuda.empty_cache()
             #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device))
             return self.einsum_op_cuda(q, k, v)