@@ -681,7 +681,6 @@ async def create_stream(
681
681
682
682
chunk = None
683
683
stop_reason = None
684
- maybe_model = None
685
684
content_chunks : List [str ] = []
686
685
full_tool_calls : List [FunctionCall ] = []
687
686
completion_tokens = 0
@@ -695,7 +694,6 @@ async def create_stream(
695
694
696
695
# set the stop_reason for the usage chunk to the prior stop_reason
697
696
stop_reason = chunk .done_reason if chunk .done and stop_reason is None else stop_reason
698
- maybe_model = chunk .model
699
697
# First try get content
700
698
if chunk .message .content is not None :
701
699
content_chunks .append (chunk .message .content )
@@ -732,9 +730,6 @@ async def create_stream(
732
730
except StopAsyncIteration :
733
731
break
734
732
735
- model = maybe_model or create_args ["model" ]
736
- model = model .replace ("gpt-35" , "gpt-3.5" ) # hack for Azure API
737
-
738
733
if chunk and chunk .prompt_eval_count :
739
734
prompt_tokens = chunk .prompt_eval_count
740
735
else :
@@ -857,6 +852,7 @@ def model_info(self) -> ModelInfo:
857
852
return self ._model_info
858
853
859
854
855
+ # TODO: see if response_format can just be a json blob instead of a BaseModel
860
856
class OllamaChatCompletionClient (BaseOllamaChatCompletionClient , Component [BaseOllamaClientConfigurationConfigModel ]):
861
857
"""Chat completion client for Ollama hosted models.
862
858
@@ -866,6 +862,7 @@ class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseO
866
862
model (str): Which Ollama model to use.
867
863
host (optional, str): Model host url.
868
864
response_format (optional, pydantic.BaseModel): The format of the response. If provided, the response will be parsed into this format as json.
865
+ options (optional, Mapping[str, Any] | Options): Additional options to pass to the Ollama client.
869
866
model_info (optional, ModelInfo): The capabilities of the model. **Required if the model is not listed in the ollama model info.**
870
867
871
868
Note:
0 commit comments