diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py index 8c25dda7aad..49f47f9c8de 100644 --- a/vllm/attention/backends/xformers.py +++ b/vllm/attention/backends/xformers.py @@ -199,6 +199,8 @@ def prefill_metadata(self) -> Optional["XFormersMetadata"]: # Compute some attn_metadata fields which default to None query_start_loc = (None if self.query_start_loc is None else self.query_start_loc[:self.num_prefills + 1]) + seq_start_loc = (None if self.seq_start_loc is None else + self.seq_start_loc[:self.num_prefills + 1]) slot_mapping = (None if self.slot_mapping is None else self.slot_mapping[:self.num_prefill_tokens]) seq_lens = (None if self.seq_lens is None else @@ -225,6 +227,7 @@ def prefill_metadata(self) -> Optional["XFormersMetadata"]: max_prefill_seq_len=self.max_prefill_seq_len, max_decode_seq_len=0, query_start_loc=query_start_loc, + seq_start_loc=seq_start_loc, context_lens_tensor=context_lens_tensor, block_tables=block_tables, use_cuda_graph=False,