File tree 2 files changed +17
-9
lines changed
2 files changed +17
-9
lines changed Original file line number Diff line number Diff line change @@ -275,11 +275,11 @@ def _compare_tp(
275
275
if load_format == "dummy" :
276
276
# Avoid OOM
277
277
text_overrides = {
278
- "num_layers " : 1 ,
279
- "num_hidden_layers " : 1 ,
280
- "num_experts " : 2 ,
281
- "num_experts_per_tok " : 2 ,
282
- "num_local_experts " : 2 ,
278
+ "num_hidden_layers " : 4 ,
279
+ "hidden_size " : 512 ,
280
+ "intermediate_size " : 800 ,
281
+ "num_attention_heads " : 4 ,
282
+ "num_key_value_heads " : 1 ,
283
283
}
284
284
285
285
if is_multimodal :
Original file line number Diff line number Diff line change 6
6
import torch
7
7
import torch .nn as nn
8
8
9
+ from vllm .config import VllmConfig
9
10
from vllm .model_executor .layers .sampler import SamplerOutput
10
11
from vllm .sequence import ExecuteModelRequest
11
12
from vllm .spec_decode .interfaces import SpeculativeProposals
@@ -25,11 +26,18 @@ class NGramWorker(NonLLMProposerWorkerBase):
25
26
which don't rely on LLM model to give proposals.
26
27
"""
27
28
28
- def __init__ (self , * args , ** kwargs ):
29
+ def __init__ (
30
+ self ,
31
+ vllm_config : VllmConfig ,
32
+ local_rank : int ,
33
+ device_type : str = "cuda" ,
34
+ ** kwargs ,
35
+ ):
36
+ super ().__init__ (vllm_config )
37
+
29
38
# Get local_rank/vocab_size from kwargs attribute
30
- self .local_rank = kwargs ["local_rank" ]
31
- self .vocab_size = kwargs ["vllm_config" ].model_config .get_vocab_size ()
32
- self .device_type = kwargs .get ("device_type" , "cuda" )
39
+ self .local_rank = local_rank
40
+ self .device_type = device_type
33
41
34
42
# Lazy initialization list.
35
43
self ._proposer : Top1Proposer
You can’t perform that action at this time.
0 commit comments