File tree 3 files changed +22
-11
lines changed 3 files changed +22
-11
lines changed Original file line number Diff line number Diff line change 24
24
RequestResponseMetadata , ToolCall , UsageInfo )
25
25
from vllm .entrypoints .openai .reasoning_parsers import (ReasoningParser ,
26
26
ReasoningParserManager )
27
- from vllm .entrypoints .openai .serving_engine import OpenAIServing
27
+ from vllm .entrypoints .openai .serving_engine import (OpenAIServing ,
28
+ clamp_prompt_logprobs )
28
29
from vllm .entrypoints .openai .serving_models import OpenAIServingModels
29
30
from vllm .entrypoints .openai .tool_parsers import ToolParser , ToolParserManager
30
31
from vllm .entrypoints .openai .tool_parsers .mistral_tool_parser import (
@@ -844,7 +845,7 @@ async def chat_completion_full_generator(
844
845
model = model_name ,
845
846
choices = choices ,
846
847
usage = usage ,
847
- prompt_logprobs = final_res .prompt_logprobs ,
848
+ prompt_logprobs = clamp_prompt_logprobs ( final_res .prompt_logprobs ) ,
848
849
)
849
850
850
851
return response
Original file line number Diff line number Diff line change 23
23
RequestResponseMetadata ,
24
24
UsageInfo )
25
25
# yapf: enable
26
- from vllm .entrypoints .openai .serving_engine import OpenAIServing
26
+ from vllm .entrypoints .openai .serving_engine import (OpenAIServing ,
27
+ clamp_prompt_logprobs )
27
28
from vllm .entrypoints .openai .serving_models import OpenAIServingModels
28
29
from vllm .logger import init_logger
29
30
from vllm .outputs import RequestOutput
@@ -394,13 +395,7 @@ def request_output_to_completion_response(
394
395
for final_res in final_res_batch :
395
396
prompt_token_ids = final_res .prompt_token_ids
396
397
assert prompt_token_ids is not None
397
- prompt_logprobs = final_res .prompt_logprobs
398
- if prompt_logprobs :
399
- for logprob_dict in prompt_logprobs :
400
- if logprob_dict :
401
- for logprob_values in logprob_dict .values ():
402
- if logprob_values .logprob == float ('-inf' ):
403
- logprob_values .logprob = - 9999.0
398
+ prompt_logprobs = clamp_prompt_logprobs (final_res .prompt_logprobs )
404
399
prompt_text = final_res .prompt
405
400
406
401
token_ids : GenericSequence [int ]
Original file line number Diff line number Diff line change 42
42
from vllm .pooling_params import PoolingParams
43
43
from vllm .prompt_adapter .request import PromptAdapterRequest
44
44
from vllm .sampling_params import BeamSearchParams , SamplingParams
45
- from vllm .sequence import Logprob
45
+ from vllm .sequence import Logprob , PromptLogprobs
46
46
from vllm .tracing import (contains_trace_headers , extract_trace_headers ,
47
47
log_tracing_disabled_warning )
48
48
from vllm .transformers_utils .tokenizer import AnyTokenizer , MistralTokenizer
@@ -535,3 +535,18 @@ def _get_model_name(self,
535
535
if model_name is None :
536
536
return self .models .base_model_paths [0 ].name
537
537
return model_name
538
+
539
+
540
+ def clamp_prompt_logprobs (
541
+ prompt_logprobs : Union [PromptLogprobs ,
542
+ None ]) -> Union [PromptLogprobs , None ]:
543
+ if prompt_logprobs is None :
544
+ return prompt_logprobs
545
+
546
+ for logprob_dict in prompt_logprobs :
547
+ if logprob_dict is None :
548
+ continue
549
+ for logprob_values in logprob_dict .values ():
550
+ if logprob_values .logprob == float ('-inf' ):
551
+ logprob_values .logprob = - 9999.0
552
+ return prompt_logprobs
You can’t perform that action at this time.
0 commit comments