You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
以下时报错内容,在容器里运行时的报错:
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/entrypoints/openai/api_server.py", line 624, in
engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/async_llm_engine.py", line 232, in from_engine_args
engine = cls(engine_args.worker_use_ray,
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/async_llm_engine.py", line 55, in init
self.engine = engine_class(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 104, in init
self._init_cache()
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 182, in _init_cache
num_blocks = self._run_workers(
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 470, in _run_workers
output = executor(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/worker/worker.py", line 108, in profile_num_available_blocks
self.model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 292, in forward
hidden_states = self.model(input_ids, positions, kv_caches,
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 260, in forward
hidden_states = layer(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 210, in forward
hidden_states = self.self_attn(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 169, in forward
attn_output = self.attn(q, k, v, k_cache, v_cache, input_metadata,
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/layers/attention.py", line 202, in forward
self.multi_query_kv_attention(
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/layers/attention.py", line 399, in multi_query_kv_attention
out = xops.memory_efficient_attention_forward(
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/init.py", line 214, in memory_efficient_attention_forward
return _memory_efficient_attention_forward(
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/init.py", line 304, in _memory_efficient_attention_forward
inp.validate_inputs()
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/common.py", line 120, in validate_inputs
raise ValueError(
ValueError: Invalid shape for attention bias: torch.Size([40, 10, 10]) (expected (1, 40, 10, 10))
query.shape: torch.Size([1, 10, 40, 128])
key.shape : torch.Size([1, 10, 40, 128])
value.shape: torch.Size([1, 10, 40, 128])
The text was updated successfully, but these errors were encountered:
Hi @Zhang-star-master, thanks for reporting the issue. The bug was fixed by a recent PR #834. As we haven't made a new release, could you please install the latest vLLM from source?
以下时报错内容,在容器里运行时的报错:
Traceback (most recent call last):
File "/usr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/usr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/entrypoints/openai/api_server.py", line 624, in
engine = AsyncLLMEngine.from_engine_args(engine_args)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/async_llm_engine.py", line 232, in from_engine_args
engine = cls(engine_args.worker_use_ray,
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/async_llm_engine.py", line 55, in init
self.engine = engine_class(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 104, in init
self._init_cache()
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 182, in _init_cache
num_blocks = self._run_workers(
File "/data1/zhangxing/pyprojects/vllm-main/vllm/engine/llm_engine.py", line 470, in _run_workers
output = executor(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/worker/worker.py", line 108, in profile_num_available_blocks
self.model(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 292, in forward
hidden_states = self.model(input_ids, positions, kv_caches,
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 260, in forward
hidden_states = layer(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 210, in forward
hidden_states = self.self_attn(
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/models/baichuan.py", line 169, in forward
attn_output = self.attn(q, k, v, k_cache, v_cache, input_metadata,
File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/layers/attention.py", line 202, in forward
self.multi_query_kv_attention(
File "/data1/zhangxing/pyprojects/vllm-main/vllm/model_executor/layers/attention.py", line 399, in multi_query_kv_attention
out = xops.memory_efficient_attention_forward(
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/init.py", line 214, in memory_efficient_attention_forward
return _memory_efficient_attention_forward(
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/init.py", line 304, in _memory_efficient_attention_forward
inp.validate_inputs()
File "/usr/local/lib/python3.8/dist-packages/xformers/ops/fmha/common.py", line 120, in validate_inputs
raise ValueError(
ValueError: Invalid shape for attention bias: torch.Size([40, 10, 10]) (expected (1, 40, 10, 10))
query.shape: torch.Size([1, 10, 40, 128])
key.shape : torch.Size([1, 10, 40, 128])
value.shape: torch.Size([1, 10, 40, 128])
The text was updated successfully, but these errors were encountered: