You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Adjust memory usage, add command line options, make sub-quadratic default if CUDA is unavailable, change sub-quadratic AttnBlock forward to use same implementation as web UI uses for xformers.
# MemoryEfficientAttnBlock forward from https://github.com/Stability-AI/stablediffusion modified to use sub-quadratic attention instead of xformers
377
372
defsub_quad_attnblock_forward(self, x):
378
373
h_=x
379
374
h_=self.norm(h_)
380
375
q=self.q(h_)
381
376
k=self.k(h_)
382
377
v=self.v(h_)
383
-
384
-
# compute attention
385
-
B, C, H, W=q.shape
386
-
q, k, v=map(lambdax: rearrange(x, 'b c h w -> b (h w) c'), (q, k, v))
Copy file name to clipboardexpand all lines: modules/shared.py
+5-2
Original file line number
Diff line number
Diff line change
@@ -56,8 +56,11 @@
56
56
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
57
57
parser.add_argument("--deepdanbooru", action='store_true', help="does not do anything")
58
58
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables Doggettx's cross-attention layer optimization. By default, it's on for torch cuda.")
parser.add_argument("--opt-split-attention-invokeai", action='store_true', help="force-enables InvokeAI's cross-attention layer optimization. By default, it's on when cuda is unavailable.")
59
+
parser.add_argument("--opt-sub-quad-attention", action='store_true', help="enable memory efficient sub-quadratic cross-attention layer optimization. By default, it's on when cuda is unavailable.")
60
+
parser.add_argument("--sub-quad-q-chunk-size", type=int, help="query chunk size for the sub-quadratic cross-attention layer optimization to use", default=1024)
61
+
parser.add_argument("--sub-quad-kv-chunk-size", type=int, help="kv chunk size for the sub-quadratic cross-attention layer optimization to use", default=None)
62
+
parser.add_argument("--sub-quad-chunk-threshold", type=int, help="the size threshold in bytes for the sub-quadratic cross-attention layer optimization to use chunking", default=None)
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
0 commit comments