Skip to content

Commit e9a6be2

Browse files
LucasWilkinsonlulmer
authored andcommitted
[Bugfix] DeepSeek Accuracy (vllm-project#14476)
Signed-off-by: Lucas Wilkinson <[email protected]> Signed-off-by: Louis Ulmer <[email protected]>
1 parent 2497bf8 commit e9a6be2

File tree

1 file changed

+7
-5
lines changed

1 file changed

+7
-5
lines changed

vllm/v1/attention/backends/mla/common.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,7 @@
222222
Fp8LinearGenericOp, current_platform_fp8_dtype, is_fp8)
223223
from vllm.model_executor.layers.quantization.utils.quant_utils import (
224224
scaled_quantize)
225-
from vllm.model_executor.layers.rotary_embedding import (
226-
DeepseekScalingRotaryEmbedding, RotaryEmbedding)
225+
from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding
227226
from vllm.utils import cdiv, round_down
228227

229228
try:
@@ -626,9 +625,12 @@ def __init__(
626625
self.qk_head_dim = qk_head_dim
627626
self.v_head_dim = v_head_dim
628627

629-
self.rotary_emb = rotary_emb
630-
self.use_yarn_rope = isinstance(rotary_emb,
631-
DeepseekScalingRotaryEmbedding)
628+
# Hack for V1 for now to avoid torch library overhead (since we are
629+
# already inside an attention custom op), pull out the forward
630+
# method from the rotary embedding and call it directly
631+
# TODO(lucas): we should probably find a cleaner way to do this
632+
self.rotary_emb = rotary_emb._forward_method
633+
632634
self.q_proj = q_proj
633635
self.kv_b_proj = kv_b_proj
634636
self.o_proj = o_proj

0 commit comments

Comments
 (0)