Skip to content

Commit 2a4ec90

Browse files
authored
Fix for breaking changes in xformers 0.0.21 (#834)
1 parent 85ebcda commit 2a4ec90

File tree

2 files changed

+4
-3
lines changed

2 files changed

+4
-3
lines changed

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ sentencepiece # Required for LLaMA tokenizer.
55
numpy
66
torch >= 2.0.0
77
transformers >= 4.31.0 # Required for LLaMA-2.
8-
xformers >= 0.0.19
8+
xformers >= 0.0.21
99
fastapi
1010
uvicorn
1111
pydantic < 2 # Required for OpenAI server.

vllm/model_executor/layers/attention.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -357,11 +357,12 @@ def set_attn_bias(self, input_metadata: InputMetadata) -> None:
357357
# be sliced from a tensor whose length is a multiple of 8.
358358
padded_len = (prompt_len + 7) // 8 * 8
359359
bias = torch.empty(
360+
1, # batch_size
360361
self.num_heads,
361-
padded_len,
362+
prompt_len,
362363
padded_len,
363364
device=self.alibi_slopes.device,
364-
)[:, :prompt_len, :prompt_len].copy_(bias)
365+
)[:, :, :, :prompt_len].copy_(bias)
365366
bias.mul_(self.alibi_slopes[:, None, None])
366367
attn_bias = LowerTriangularMaskWithTensorBias(bias)
367368
input_metadata.attn_bias.append(attn_bias)

0 commit comments

Comments
 (0)