vllm-project · DarkLight1337 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025 · Jan 17, 2025
diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py
@@ -18,6 +18,7 @@
                                                      Tekkenizer)
 
 from vllm.logger import init_logger
+from vllm.utils import is_list_of
 
 if TYPE_CHECKING:
     from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
@@ -27,7 +28,7 @@
 
 @dataclass
 class Encoding:
-    input_ids: List[int]
+    input_ids: Union[List[int], List[List[int]]]
 
 
 def maybe_serialize_tool_calls(request: ChatCompletionRequest):
@@ -223,12 +224,27 @@ def __len__(self) -> int:
 
     def __call__(
         self,
-        prompt: str,
+        prompt: Union[str, List[str], List[int]],
         add_special_tokens: bool = False,
         truncation: bool = False,
         max_length: Optional[int] = None,
     ):
+        # For List[str], original prompt text
+        if is_list_of(prompt, str):
+            all_input_ids = []
+            for p in prompt:
+                input_ids = self.encode(p)
+                if truncation:
+                    input_ids = input_ids[:max_length]
+                all_input_ids.append(input_ids)
+            return Encoding(input_ids=all_input_ids)
+
+        # For List[int], apply chat template output
+        if is_list_of(prompt, int):
+            return Encoding(input_ids=prompt)
+
         # Mistral Tokenizers should not add special tokens
+        assert isinstance(prompt, str), f"Invalid prompt: {prompt}"
         input_ids = self.encode(prompt)
 
         if truncation: