add options to ollama client (microsoft#5805)

peterychang · ekzhu · web-flow · commit 8c9961ecba3b · 2025-03-03T13:24:14.000-08:00
Necessary to configure ollama client ## Related issue number microsoft#5597 Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py b/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py
@@ -681,7 +681,6 @@ async def create_stream(
 
         chunk = None
         stop_reason = None
-        maybe_model = None
         content_chunks: List[str] = []
         full_tool_calls: List[FunctionCall] = []
         completion_tokens = 0
@@ -695,7 +694,6 @@ async def create_stream(
 
                 # set the stop_reason for the usage chunk to the prior stop_reason
                 stop_reason = chunk.done_reason if chunk.done and stop_reason is None else stop_reason
-                maybe_model = chunk.model
                 # First try get content
                 if chunk.message.content is not None:
                     content_chunks.append(chunk.message.content)
@@ -732,9 +730,6 @@ async def create_stream(
             except StopAsyncIteration:
                 break
 
-        model = maybe_model or create_args["model"]
-        model = model.replace("gpt-35", "gpt-3.5")  # hack for Azure API
-
         if chunk and chunk.prompt_eval_count:
             prompt_tokens = chunk.prompt_eval_count
         else:
@@ -857,6 +852,7 @@ def model_info(self) -> ModelInfo:
         return self._model_info
 
 
+# TODO: see if response_format can just be a json blob instead of a BaseModel
 class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseOllamaClientConfigurationConfigModel]):
     """Chat completion client for Ollama hosted models.
 
@@ -866,6 +862,7 @@ class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseO
         model (str): Which Ollama model to use.
         host (optional, str): Model host url.
         response_format (optional, pydantic.BaseModel): The format of the response. If provided, the response will be parsed into this format as json.
+        options (optional, Mapping[str, Any] | Options): Additional options to pass to the Ollama client.
         model_info (optional, ModelInfo): The capabilities of the model. **Required if the model is not listed in the ollama model info.**
 
     Note:
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py
@@ -1,12 +1,14 @@
-from typing import Any, Mapping, Optional
+from typing import Any, Mapping, Optional, Union
 
 from autogen_core.models import ModelCapabilities, ModelInfo  # type: ignore
+from ollama import Options
 from pydantic import BaseModel
 from typing_extensions import TypedDict
 
 
 # response_format MUST be a pydantic.BaseModel type or None
 # TODO: check if we can extend response_format to support json and/or dict
+# TODO: extend arguments to all AsyncClient supported args
 class CreateArguments(TypedDict, total=False):
     model: str
     host: Optional[str]
@@ -20,6 +22,7 @@ class BaseOllamaClientConfiguration(CreateArguments, total=False):
     model_capabilities: ModelCapabilities  # type: ignore
     model_info: ModelInfo
     """What functionality the model supports, determined by default from model name but is overriden if value passed."""
+    options: Optional[Union[Mapping[str, Any], Options]]
 
 
 # Pydantic equivalents of the above TypedDicts
@@ -37,3 +40,4 @@ class BaseOllamaClientConfigurationConfigModel(CreateArgumentsConfigModel):
     headers: Mapping[str, str] | None = None
     model_capabilities: ModelCapabilities | None = None  # type: ignore
     model_info: ModelInfo | None = None
+    options: Mapping[str, Any] | Options | None = None