We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent dcc96c9 commit fbd3ac7Copy full SHA for fbd3ac7
src/diffusers/models/sub_quadratic_attention.py
@@ -173,6 +173,14 @@ def get_query_chunk(chunk_idx: int) -> Tensor:
173
summarize_chunk=summarize_chunk,
174
)
175
176
+
177
+ if q_tokens <= query_chunk_size:
178
+ # fast-path for when there's just 1 query chunk
179
+ return compute_query_chunk_attn(
180
+ query=query,
181
+ key=key,
182
+ value=value,
183
+ )
184
185
# TODO: maybe we should use torch.empty_like(query) to allocate storage in-advance,
186
# and pass slices to be mutated, instead of torch.cat()ing the returned slices
0 commit comments