Skip to content

Commit 8c9961e

Browse files
peterychangekzhu
andauthored
add options to ollama client (microsoft#5805)
Necessary to configure ollama client ## Related issue number microsoft#5597 Co-authored-by: Eric Zhu <[email protected]>
1 parent dd1ade8 commit 8c9961e

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -681,7 +681,6 @@ async def create_stream(
681681

682682
chunk = None
683683
stop_reason = None
684-
maybe_model = None
685684
content_chunks: List[str] = []
686685
full_tool_calls: List[FunctionCall] = []
687686
completion_tokens = 0
@@ -695,7 +694,6 @@ async def create_stream(
695694

696695
# set the stop_reason for the usage chunk to the prior stop_reason
697696
stop_reason = chunk.done_reason if chunk.done and stop_reason is None else stop_reason
698-
maybe_model = chunk.model
699697
# First try get content
700698
if chunk.message.content is not None:
701699
content_chunks.append(chunk.message.content)
@@ -732,9 +730,6 @@ async def create_stream(
732730
except StopAsyncIteration:
733731
break
734732

735-
model = maybe_model or create_args["model"]
736-
model = model.replace("gpt-35", "gpt-3.5") # hack for Azure API
737-
738733
if chunk and chunk.prompt_eval_count:
739734
prompt_tokens = chunk.prompt_eval_count
740735
else:
@@ -857,6 +852,7 @@ def model_info(self) -> ModelInfo:
857852
return self._model_info
858853

859854

855+
# TODO: see if response_format can just be a json blob instead of a BaseModel
860856
class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseOllamaClientConfigurationConfigModel]):
861857
"""Chat completion client for Ollama hosted models.
862858
@@ -866,6 +862,7 @@ class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseO
866862
model (str): Which Ollama model to use.
867863
host (optional, str): Model host url.
868864
response_format (optional, pydantic.BaseModel): The format of the response. If provided, the response will be parsed into this format as json.
865+
options (optional, Mapping[str, Any] | Options): Additional options to pass to the Ollama client.
869866
model_info (optional, ModelInfo): The capabilities of the model. **Required if the model is not listed in the ollama model info.**
870867
871868
Note:

python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
from typing import Any, Mapping, Optional
1+
from typing import Any, Mapping, Optional, Union
22

33
from autogen_core.models import ModelCapabilities, ModelInfo # type: ignore
4+
from ollama import Options
45
from pydantic import BaseModel
56
from typing_extensions import TypedDict
67

78

89
# response_format MUST be a pydantic.BaseModel type or None
910
# TODO: check if we can extend response_format to support json and/or dict
11+
# TODO: extend arguments to all AsyncClient supported args
1012
class CreateArguments(TypedDict, total=False):
1113
model: str
1214
host: Optional[str]
@@ -20,6 +22,7 @@ class BaseOllamaClientConfiguration(CreateArguments, total=False):
2022
model_capabilities: ModelCapabilities # type: ignore
2123
model_info: ModelInfo
2224
"""What functionality the model supports, determined by default from model name but is overriden if value passed."""
25+
options: Optional[Union[Mapping[str, Any], Options]]
2326

2427

2528
# Pydantic equivalents of the above TypedDicts
@@ -37,3 +40,4 @@ class BaseOllamaClientConfigurationConfigModel(CreateArgumentsConfigModel):
3740
headers: Mapping[str, str] | None = None
3841
model_capabilities: ModelCapabilities | None = None # type: ignore
3942
model_info: ModelInfo | None = None
43+
options: Mapping[str, Any] | Options | None = None

0 commit comments

Comments
 (0)