add fast-path for "1 query chunk"

Birch-san · Birch-san · commit fbd3ac7de143 · 2022-12-28T02:00:48.000Z
diff --git a/src/diffusers/models/sub_quadratic_attention.py b/src/diffusers/models/sub_quadratic_attention.py
@@ -173,6 +173,14 @@ def get_query_chunk(chunk_idx: int) -> Tensor:
             summarize_chunk=summarize_chunk,
         )
     )
+
+    if q_tokens <= query_chunk_size:
+        # fast-path for when there's just 1 query chunk
+        return compute_query_chunk_attn(
+            query=query,
+            key=key,
+            value=value,
+        )
     
     # TODO: maybe we should use torch.empty_like(query) to allocate storage in-advance,
     # and pass slices to be mutated, instead of torch.cat()ing the returned slices

Original file line number	Diff line number	Diff line change
`@@ -173,6 +173,14 @@ def get_query_chunk(chunk_idx: int) -> Tensor:`
`173`	`173`	`summarize_chunk=summarize_chunk,`
`174`	`174`	`)`
`175`	`175`	`)`
	`176`	`+`
	`177`	`+ if q_tokens <= query_chunk_size:`
	`178`	`+ # fast-path for when there's just 1 query chunk`
	`179`	`+ return compute_query_chunk_attn(`
	`180`	`+ query=query,`
	`181`	`+ key=key,`
	`182`	`+ value=value,`
	`183`	`+ )`
`176`	`184`
`177`	`185`	`# TODO: maybe we should use torch.empty_like(query) to allocate storage in-advance,`
`178`	`186`	`# and pass slices to be mutated, instead of torch.cat()ing the returned slices`