From 79696b61da847ac3d2275a1edf455cf031ebe563 Mon Sep 17 00:00:00 2001 From: Rohan Mehta Date: Mon, 14 Apr 2025 17:55:36 -0400 Subject: [PATCH 1/2] Replace referencable_id with response_id --- src/agents/items.py | 2 +- src/agents/models/openai_chatcompletions.py | 2 +- src/agents/models/openai_responses.py | 2 +- src/agents/run.py | 2 +- tests/fake_model.py | 2 +- tests/test_items_helpers.py | 12 ++++---- tests/test_openai_chatcompletions.py | 2 +- tests/test_run_step_execution.py | 20 ++++++------- tests/test_run_step_processing.py | 32 ++++++++++----------- 9 files changed, 38 insertions(+), 38 deletions(-) diff --git a/src/agents/items.py b/src/agents/items.py index c2af0dfc..d72701ab 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -166,7 +166,7 @@ class ModelResponse: usage: Usage """The usage information for the response.""" - referenceable_id: str | None + response_id: str | None """An ID for the response which can be used to refer to the response in subsequent calls to the model. Not supported by all model providers. """ diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index 6978ee30..b60d5d6a 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -156,7 +156,7 @@ async def get_response( return ModelResponse( output=items, usage=usage, - referenceable_id=None, + response_id=None, ) async def stream_response( diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index 055ab79b..e509d6f8 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -120,7 +120,7 @@ async def get_response( return ModelResponse( output=response.output, usage=usage, - referenceable_id=response.id, + response_id=response.id, ) async def stream_response( diff --git a/src/agents/run.py b/src/agents/run.py index 0159822a..93e6490c 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -677,7 +677,7 @@ async def _run_single_turn_streamed( final_response = ModelResponse( output=event.response.output, usage=usage, - referenceable_id=event.response.id, + response_id=event.response.id, ) streamed_result._event_queue.put_nowait(RawResponsesStreamEvent(data=event)) diff --git a/tests/fake_model.py b/tests/fake_model.py index ecbb7583..61fb5951 100644 --- a/tests/fake_model.py +++ b/tests/fake_model.py @@ -81,7 +81,7 @@ async def get_response( return ModelResponse( output=output, usage=Usage(), - referenceable_id=None, + response_id=None, ) async def stream_response( diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py index 90fe6475..5dba21d8 100644 --- a/tests/test_items_helpers.py +++ b/tests/test_items_helpers.py @@ -168,7 +168,7 @@ def test_to_input_items_for_message() -> None: message = ResponseOutputMessage( id="m1", content=[content], role="assistant", status="completed", type="message" ) - resp = ModelResponse(output=[message], usage=Usage(), referenceable_id=None) + resp = ModelResponse(output=[message], usage=Usage(), response_id=None) input_items = resp.to_input_items() assert isinstance(input_items, list) and len(input_items) == 1 # The dict should contain exactly the primitive values of the message @@ -193,7 +193,7 @@ def test_to_input_items_for_function_call() -> None: tool_call = ResponseFunctionToolCall( id="f1", arguments="{}", call_id="c1", name="func", type="function_call" ) - resp = ModelResponse(output=[tool_call], usage=Usage(), referenceable_id=None) + resp = ModelResponse(output=[tool_call], usage=Usage(), response_id=None) input_items = resp.to_input_items() assert isinstance(input_items, list) and len(input_items) == 1 expected: ResponseFunctionToolCallParam = { @@ -211,7 +211,7 @@ def test_to_input_items_for_file_search_call() -> None: fs_call = ResponseFileSearchToolCall( id="fs1", queries=["query"], status="completed", type="file_search_call" ) - resp = ModelResponse(output=[fs_call], usage=Usage(), referenceable_id=None) + resp = ModelResponse(output=[fs_call], usage=Usage(), response_id=None) input_items = resp.to_input_items() assert isinstance(input_items, list) and len(input_items) == 1 expected: ResponseFileSearchToolCallParam = { @@ -226,7 +226,7 @@ def test_to_input_items_for_file_search_call() -> None: def test_to_input_items_for_web_search_call() -> None: """A web search tool call output should produce the same dict as a web search input.""" ws_call = ResponseFunctionWebSearch(id="w1", status="completed", type="web_search_call") - resp = ModelResponse(output=[ws_call], usage=Usage(), referenceable_id=None) + resp = ModelResponse(output=[ws_call], usage=Usage(), response_id=None) input_items = resp.to_input_items() assert isinstance(input_items, list) and len(input_items) == 1 expected: ResponseFunctionWebSearchParam = { @@ -248,7 +248,7 @@ def test_to_input_items_for_computer_call_click() -> None: pending_safety_checks=[], status="completed", ) - resp = ModelResponse(output=[comp_call], usage=Usage(), referenceable_id=None) + resp = ModelResponse(output=[comp_call], usage=Usage(), response_id=None) input_items = resp.to_input_items() assert isinstance(input_items, list) and len(input_items) == 1 converted_dict = input_items[0] @@ -268,7 +268,7 @@ def test_to_input_items_for_reasoning() -> None: """A reasoning output should produce the same dict as a reasoning input item.""" rc = Summary(text="why", type="summary_text") reasoning = ResponseReasoningItem(id="rid1", summary=[rc], type="reasoning") - resp = ModelResponse(output=[reasoning], usage=Usage(), referenceable_id=None) + resp = ModelResponse(output=[reasoning], usage=Usage(), response_id=None) input_items = resp.to_input_items() assert isinstance(input_items, list) and len(input_items) == 1 converted_dict = input_items[0] diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py index 281d7b41..3608fc57 100644 --- a/tests/test_openai_chatcompletions.py +++ b/tests/test_openai_chatcompletions.py @@ -80,7 +80,7 @@ async def patched_fetch_response(self, *args, **kwargs): assert resp.usage.input_tokens == 7 assert resp.usage.output_tokens == 5 assert resp.usage.total_tokens == 12 - assert resp.referenceable_id is None + assert resp.response_id is None @pytest.mark.allow_call_model_methods diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py index 16c62c84..6ae25fbd 100644 --- a/tests/test_run_step_execution.py +++ b/tests/test_run_step_execution.py @@ -43,7 +43,7 @@ async def test_empty_response_is_final_output(): response = ModelResponse( output=[], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent, response) @@ -59,7 +59,7 @@ async def test_plaintext_agent_no_tool_calls_is_final_output(): response = ModelResponse( output=[get_text_message("hello_world")], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent, response) @@ -79,7 +79,7 @@ async def test_plaintext_agent_no_tool_calls_multiple_messages_is_final_output() get_text_message("bye"), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result( agent, @@ -105,7 +105,7 @@ async def test_plaintext_agent_with_tool_call_is_run_again(): response = ModelResponse( output=[get_text_message("hello_world"), get_function_tool_call("test", "")], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent, response) @@ -140,7 +140,7 @@ async def test_multiple_tool_calls(): get_function_tool_call("test_2"), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent, response) @@ -166,7 +166,7 @@ async def test_handoff_output_leads_to_handoff_next_step(): response = ModelResponse( output=[get_text_message("Hello, world!"), get_handoff_tool_call(agent_1)], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent_3, response) @@ -186,7 +186,7 @@ async def test_final_output_without_tool_runs_again(): response = ModelResponse( output=[get_function_tool_call("tool_1")], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent, response) @@ -203,7 +203,7 @@ async def test_final_output_leads_to_final_output_next_step(): get_final_output_message(Foo(bar="123").model_dump_json()), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent, response) @@ -222,7 +222,7 @@ async def test_handoff_and_final_output_leads_to_handoff_next_step(): get_handoff_tool_call(agent_1), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent_3, response) @@ -241,7 +241,7 @@ async def test_multiple_final_output_leads_to_final_output_next_step(): get_final_output_message(Foo(bar="456").model_dump_json()), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = await get_execute_result(agent_3, response) diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index 2a6634ac..2ea98f06 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -39,7 +39,7 @@ def test_empty_response(): response = ModelResponse( output=[], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( @@ -58,7 +58,7 @@ def test_no_tool_calls(): response = ModelResponse( output=[get_text_message("Hello, world!")], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent, response=response, output_schema=None, handoffs=[], all_tools=[] @@ -76,7 +76,7 @@ async def test_single_tool_call(): get_function_tool_call("test", ""), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent, @@ -102,7 +102,7 @@ async def test_missing_tool_call_raises_error(): get_function_tool_call("missing", ""), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) with pytest.raises(ModelBehaviorError): @@ -132,7 +132,7 @@ async def test_multiple_tool_calls(): get_function_tool_call("test_2", "xyz"), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( @@ -162,7 +162,7 @@ async def test_handoffs_parsed_correctly(): response = ModelResponse( output=[get_text_message("Hello, world!")], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent_3, @@ -176,7 +176,7 @@ async def test_handoffs_parsed_correctly(): response = ModelResponse( output=[get_text_message("Hello, world!"), get_handoff_tool_call(agent_1)], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent_3, @@ -205,7 +205,7 @@ async def test_missing_handoff_fails(): response = ModelResponse( output=[get_text_message("Hello, world!"), get_handoff_tool_call(agent_2)], usage=Usage(), - referenceable_id=None, + response_id=None, ) with pytest.raises(ModelBehaviorError): RunImpl.process_model_response( @@ -229,7 +229,7 @@ async def test_multiple_handoffs_doesnt_error(): get_handoff_tool_call(agent_2), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent_3, @@ -254,7 +254,7 @@ async def test_final_output_parsed_correctly(): get_final_output_message(Foo(bar="123").model_dump_json()), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) RunImpl.process_model_response( @@ -281,7 +281,7 @@ async def test_file_search_tool_call_parsed_correctly(): response = ModelResponse( output=[get_text_message("hello"), file_search_call], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent, @@ -306,7 +306,7 @@ async def test_function_web_search_tool_call_parsed_correctly(): response = ModelResponse( output=[get_text_message("hello"), web_search_call], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent, @@ -333,7 +333,7 @@ async def test_reasoning_item_parsed_correctly(): response = ModelResponse( output=[reasoning], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=Agent(name="test"), @@ -401,7 +401,7 @@ async def test_computer_tool_call_without_computer_tool_raises_error(): response = ModelResponse( output=[computer_call], usage=Usage(), - referenceable_id=None, + response_id=None, ) with pytest.raises(ModelBehaviorError): RunImpl.process_model_response( @@ -430,7 +430,7 @@ async def test_computer_tool_call_with_computer_tool_parsed_correctly(): response = ModelResponse( output=[computer_call], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( agent=agent, @@ -460,7 +460,7 @@ async def test_tool_and_handoff_parsed_correctly(): get_handoff_tool_call(agent_1), ], usage=Usage(), - referenceable_id=None, + response_id=None, ) result = RunImpl.process_model_response( From d6226059df2c4a3056fd9f33f3836286f0ded1c1 Mon Sep 17 00:00:00 2001 From: Rohan Mehta Date: Mon, 14 Apr 2025 17:55:42 -0400 Subject: [PATCH 2/2] Previous response id --- src/agents/items.py | 2 + src/agents/models/interface.py | 8 ++ src/agents/models/openai_chatcompletions.py | 3 + src/agents/models/openai_responses.py | 9 ++ src/agents/result.py | 8 ++ src/agents/run.py | 22 +++- tests/fake_model.py | 4 + tests/test_openai_chatcompletions.py | 3 + tests/test_openai_chatcompletions_stream.py | 3 + tests/test_responses_tracing.py | 108 +++++++++++++++++--- tests/voice/test_workflow.py | 4 + 11 files changed, 161 insertions(+), 13 deletions(-) diff --git a/src/agents/items.py b/src/agents/items.py index d72701ab..8fb2b52a 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -169,6 +169,8 @@ class ModelResponse: response_id: str | None """An ID for the response which can be used to refer to the response in subsequent calls to the model. Not supported by all model providers. + If using OpenAI models via the Responses API, this is the `response_id` parameter, and it can + be passed to `Runner.run`. """ def to_input_items(self) -> list[TResponseInputItem]: diff --git a/src/agents/models/interface.py b/src/agents/models/interface.py index e9a8700c..bcf2c1a6 100644 --- a/src/agents/models/interface.py +++ b/src/agents/models/interface.py @@ -44,6 +44,8 @@ async def get_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> ModelResponse: """Get a response from the model. @@ -55,6 +57,8 @@ async def get_response( output_schema: The output schema to use. handoffs: The handoffs available to the model. tracing: Tracing configuration. + previous_response_id: the ID of the previous response. Generally not used by the model, + except for the OpenAI Responses API. Returns: The full model response. @@ -71,6 +75,8 @@ def stream_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> AsyncIterator[TResponseStreamEvent]: """Stream a response from the model. @@ -82,6 +88,8 @@ def stream_response( output_schema: The output schema to use. handoffs: The handoffs available to the model. tracing: Tracing configuration. + previous_response_id: the ID of the previous response. Generally not used by the model, + except for the OpenAI Responses API. Returns: An iterator of response stream events, in OpenAI Responses format. diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py index b60d5d6a..c12fe68a 100644 --- a/src/agents/models/openai_chatcompletions.py +++ b/src/agents/models/openai_chatcompletions.py @@ -108,6 +108,7 @@ async def get_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + previous_response_id: str | None, ) -> ModelResponse: with generation_span( model=str(self.model), @@ -168,6 +169,8 @@ async def stream_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> AsyncIterator[TResponseStreamEvent]: """ Yields a partial message as it is generated, as well as the usage information. diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index e509d6f8..ab4617d4 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -69,6 +69,7 @@ async def get_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + previous_response_id: str | None, ) -> ModelResponse: with response_span(disabled=tracing.is_disabled()) as span_response: try: @@ -79,6 +80,7 @@ async def get_response( tools, output_schema, handoffs, + previous_response_id, stream=False, ) @@ -132,6 +134,7 @@ async def stream_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + previous_response_id: str | None, ) -> AsyncIterator[ResponseStreamEvent]: """ Yields a partial message as it is generated, as well as the usage information. @@ -145,6 +148,7 @@ async def stream_response( tools, output_schema, handoffs, + previous_response_id, stream=True, ) @@ -180,6 +184,7 @@ async def _fetch_response( tools: list[Tool], output_schema: AgentOutputSchema | None, handoffs: list[Handoff], + previous_response_id: str | None, stream: Literal[True], ) -> AsyncStream[ResponseStreamEvent]: ... @@ -192,6 +197,7 @@ async def _fetch_response( tools: list[Tool], output_schema: AgentOutputSchema | None, handoffs: list[Handoff], + previous_response_id: str | None, stream: Literal[False], ) -> Response: ... @@ -203,6 +209,7 @@ async def _fetch_response( tools: list[Tool], output_schema: AgentOutputSchema | None, handoffs: list[Handoff], + previous_response_id: str | None, stream: Literal[True] | Literal[False] = False, ) -> Response | AsyncStream[ResponseStreamEvent]: list_input = ItemHelpers.input_to_new_input_list(input) @@ -229,9 +236,11 @@ async def _fetch_response( f"Stream: {stream}\n" f"Tool choice: {tool_choice}\n" f"Response format: {response_format}\n" + f"Previous response id: {previous_response_id}\n" ) return await self._client.responses.create( + previous_response_id=self._non_null_or_not_given(previous_response_id), instructions=self._non_null_or_not_given(system_instructions), model=self.model, input=list_input, diff --git a/src/agents/result.py b/src/agents/result.py index 40a64806..a2a6cc4a 100644 --- a/src/agents/result.py +++ b/src/agents/result.py @@ -80,6 +80,14 @@ def to_input_list(self) -> list[TResponseInputItem]: return original_items + new_items + @property + def last_response_id(self) -> str | None: + """Convenience method to get the response ID of the last model response.""" + if not self.raw_responses: + return None + + return self.raw_responses[-1].response_id + @dataclass class RunResult(RunResultBase): diff --git a/src/agents/run.py b/src/agents/run.py index 93e6490c..e2b0dbce 100644 --- a/src/agents/run.py +++ b/src/agents/run.py @@ -117,6 +117,7 @@ async def run( max_turns: int = DEFAULT_MAX_TURNS, hooks: RunHooks[TContext] | None = None, run_config: RunConfig | None = None, + previous_response_id: str | None = None, ) -> RunResult: """Run a workflow starting at the given agent. The agent will run in a loop until a final output is generated. The loop runs like so: @@ -141,6 +142,8 @@ async def run( AI invocation (including any tool calls that might occur). hooks: An object that receives callbacks on various lifecycle events. run_config: Global settings for the entire agent run. + previous_response_id: The ID of the previous response, if using OpenAI models via the + Responses API, this allows you to skip passing in input from the previous turn. Returns: A run result containing all the inputs, guardrail results and the output of the last @@ -230,6 +233,7 @@ async def run( run_config=run_config, should_run_agent_start_hooks=should_run_agent_start_hooks, tool_use_tracker=tool_use_tracker, + previous_response_id=previous_response_id, ), ) else: @@ -243,6 +247,7 @@ async def run( run_config=run_config, should_run_agent_start_hooks=should_run_agent_start_hooks, tool_use_tracker=tool_use_tracker, + previous_response_id=previous_response_id, ) should_run_agent_start_hooks = False @@ -291,6 +296,7 @@ def run_sync( max_turns: int = DEFAULT_MAX_TURNS, hooks: RunHooks[TContext] | None = None, run_config: RunConfig | None = None, + previous_response_id: str | None = None, ) -> RunResult: """Run a workflow synchronously, starting at the given agent. Note that this just wraps the `run` method, so it will not work if there's already an event loop (e.g. inside an async @@ -319,6 +325,8 @@ def run_sync( AI invocation (including any tool calls that might occur). hooks: An object that receives callbacks on various lifecycle events. run_config: Global settings for the entire agent run. + previous_response_id: The ID of the previous response, if using OpenAI models via the + Responses API, this allows you to skip passing in input from the previous turn. Returns: A run result containing all the inputs, guardrail results and the output of the last @@ -332,6 +340,7 @@ def run_sync( max_turns=max_turns, hooks=hooks, run_config=run_config, + previous_response_id=previous_response_id, ) ) @@ -344,6 +353,7 @@ def run_streamed( max_turns: int = DEFAULT_MAX_TURNS, hooks: RunHooks[TContext] | None = None, run_config: RunConfig | None = None, + previous_response_id: str | None = None, ) -> RunResultStreaming: """Run a workflow starting at the given agent in streaming mode. The returned result object contains a method you can use to stream semantic events as they are generated. @@ -370,7 +380,8 @@ def run_streamed( AI invocation (including any tool calls that might occur). hooks: An object that receives callbacks on various lifecycle events. run_config: Global settings for the entire agent run. - + previous_response_id: The ID of the previous response, if using OpenAI models via the + Responses API, this allows you to skip passing in input from the previous turn. Returns: A result object that contains data about the run, as well as a method to stream events. """ @@ -428,6 +439,7 @@ def run_streamed( hooks=hooks, context_wrapper=context_wrapper, run_config=run_config, + previous_response_id=previous_response_id, ) ) return streamed_result @@ -485,6 +497,7 @@ async def _run_streamed_impl( hooks: RunHooks[TContext], context_wrapper: RunContextWrapper[TContext], run_config: RunConfig, + previous_response_id: str | None, ): current_span: Span[AgentSpanData] | None = None current_agent = starting_agent @@ -554,6 +567,7 @@ async def _run_streamed_impl( should_run_agent_start_hooks, tool_use_tracker, all_tools, + previous_response_id, ) should_run_agent_start_hooks = False @@ -623,6 +637,7 @@ async def _run_single_turn_streamed( should_run_agent_start_hooks: bool, tool_use_tracker: AgentToolUseTracker, all_tools: list[Tool], + previous_response_id: str | None, ) -> SingleStepResult: if should_run_agent_start_hooks: await asyncio.gather( @@ -662,6 +677,7 @@ async def _run_single_turn_streamed( get_model_tracing_impl( run_config.tracing_disabled, run_config.trace_include_sensitive_data ), + previous_response_id=previous_response_id, ): if isinstance(event, ResponseCompletedEvent): usage = ( @@ -717,6 +733,7 @@ async def _run_single_turn( run_config: RunConfig, should_run_agent_start_hooks: bool, tool_use_tracker: AgentToolUseTracker, + previous_response_id: str | None, ) -> SingleStepResult: # Ensure we run the hooks before anything else if should_run_agent_start_hooks: @@ -746,6 +763,7 @@ async def _run_single_turn( context_wrapper, run_config, tool_use_tracker, + previous_response_id, ) return await cls._get_single_step_result_from_response( @@ -888,6 +906,7 @@ async def _get_new_response( context_wrapper: RunContextWrapper[TContext], run_config: RunConfig, tool_use_tracker: AgentToolUseTracker, + previous_response_id: str | None, ) -> ModelResponse: model = cls._get_model(agent, run_config) model_settings = agent.model_settings.resolve(run_config.model_settings) @@ -903,6 +922,7 @@ async def _get_new_response( tracing=get_model_tracing_impl( run_config.tracing_disabled, run_config.trace_include_sensitive_data ), + previous_response_id=previous_response_id, ) context_wrapper.usage.add(new_response.usage) diff --git a/tests/fake_model.py b/tests/fake_model.py index 61fb5951..203479d0 100644 --- a/tests/fake_model.py +++ b/tests/fake_model.py @@ -54,6 +54,8 @@ async def get_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> ModelResponse: self.last_turn_args = { "system_instructions": system_instructions, @@ -93,6 +95,8 @@ async def stream_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> AsyncIterator[TResponseStreamEvent]: with generation_span(disabled=not self.tracing_enabled) as span: output = self.get_next_output() diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py index 3608fc57..92d65fda 100644 --- a/tests/test_openai_chatcompletions.py +++ b/tests/test_openai_chatcompletions.py @@ -67,6 +67,7 @@ async def patched_fetch_response(self, *args, **kwargs): output_schema=None, handoffs=[], tracing=ModelTracing.DISABLED, + previous_response_id=None, ) # Should have produced exactly one output message with one text part assert isinstance(resp, ModelResponse) @@ -115,6 +116,7 @@ async def patched_fetch_response(self, *args, **kwargs): output_schema=None, handoffs=[], tracing=ModelTracing.DISABLED, + previous_response_id=None, ) assert len(resp.output) == 1 assert isinstance(resp.output[0], ResponseOutputMessage) @@ -164,6 +166,7 @@ async def patched_fetch_response(self, *args, **kwargs): output_schema=None, handoffs=[], tracing=ModelTracing.DISABLED, + previous_response_id=None, ) # Expect a message item followed by a function tool call item. assert len(resp.output) == 2 diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py index 7add92a6..b82f2430 100644 --- a/tests/test_openai_chatcompletions_stream.py +++ b/tests/test_openai_chatcompletions_stream.py @@ -79,6 +79,7 @@ async def patched_fetch_response(self, *args, **kwargs): output_schema=None, handoffs=[], tracing=ModelTracing.DISABLED, + previous_response_id=None, ): output_events.append(event) # We expect a response.created, then a response.output_item.added, content part added, @@ -168,6 +169,7 @@ async def patched_fetch_response(self, *args, **kwargs): output_schema=None, handoffs=[], tracing=ModelTracing.DISABLED, + previous_response_id=None, ): output_events.append(event) # Expect sequence similar to text: created, output_item.added, content part added, @@ -255,6 +257,7 @@ async def patched_fetch_response(self, *args, **kwargs): output_schema=None, handoffs=[], tracing=ModelTracing.DISABLED, + previous_response_id=None, ): output_events.append(event) # Sequence should be: response.created, then after loop we expect function call-related events: diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py index 40bdfafb..0bc97a95 100644 --- a/tests/test_responses_tracing.py +++ b/tests/test_responses_tracing.py @@ -44,7 +44,14 @@ async def test_get_response_creates_trace(monkeypatch): # Mock _fetch_response to return a dummy response with a known id async def dummy_fetch_response( - system_instructions, input, model_settings, tools, output_schema, handoffs, stream + system_instructions, + input, + model_settings, + tools, + output_schema, + handoffs, + prev_response_id, + stream, ): return DummyResponse() @@ -52,7 +59,14 @@ async def dummy_fetch_response( # Call get_response await model.get_response( - "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED + "instr", + "input", + ModelSettings(), + [], + None, + [], + ModelTracing.ENABLED, + previous_response_id=None, ) assert fetch_normalized_spans() == snapshot( @@ -74,7 +88,14 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch): # Mock _fetch_response to return a dummy response with a known id async def dummy_fetch_response( - system_instructions, input, model_settings, tools, output_schema, handoffs, stream + system_instructions, + input, + model_settings, + tools, + output_schema, + handoffs, + prev_response_id, + stream, ): return DummyResponse() @@ -82,7 +103,14 @@ async def dummy_fetch_response( # Call get_response await model.get_response( - "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA + "instr", + "input", + ModelSettings(), + [], + None, + [], + ModelTracing.ENABLED_WITHOUT_DATA, + previous_response_id=None, ) assert fetch_normalized_spans() == snapshot( @@ -102,7 +130,14 @@ async def test_disable_tracing_does_not_create_span(monkeypatch): # Mock _fetch_response to return a dummy response with a known id async def dummy_fetch_response( - system_instructions, input, model_settings, tools, output_schema, handoffs, stream + system_instructions, + input, + model_settings, + tools, + output_schema, + handoffs, + prev_response_id, + stream, ): return DummyResponse() @@ -110,7 +145,14 @@ async def dummy_fetch_response( # Call get_response await model.get_response( - "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED + "instr", + "input", + ModelSettings(), + [], + None, + [], + ModelTracing.DISABLED, + previous_response_id=None, ) assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}]) @@ -127,7 +169,14 @@ async def test_stream_response_creates_trace(monkeypatch): # Define a dummy fetch function that returns an async stream with a dummy response async def dummy_fetch_response( - system_instructions, input, model_settings, tools, output_schema, handoffs, stream + system_instructions, + input, + model_settings, + tools, + output_schema, + handoffs, + prev_response_id, + stream, ): class DummyStream: async def __aiter__(self): @@ -142,7 +191,14 @@ async def __aiter__(self): # Consume the stream to trigger processing of the final response async for _ in model.stream_response( - "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED + "instr", + "input", + ModelSettings(), + [], + None, + [], + ModelTracing.ENABLED, + previous_response_id=None, ): pass @@ -165,7 +221,14 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch): # Define a dummy fetch function that returns an async stream with a dummy response async def dummy_fetch_response( - system_instructions, input, model_settings, tools, output_schema, handoffs, stream + system_instructions, + input, + model_settings, + tools, + output_schema, + handoffs, + prev_response_id, + stream, ): class DummyStream: async def __aiter__(self): @@ -180,7 +243,14 @@ async def __aiter__(self): # Consume the stream to trigger processing of the final response async for _ in model.stream_response( - "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA + "instr", + "input", + ModelSettings(), + [], + None, + [], + ModelTracing.ENABLED_WITHOUT_DATA, + previous_response_id=None, ): pass @@ -202,7 +272,14 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch): # Define a dummy fetch function that returns an async stream with a dummy response async def dummy_fetch_response( - system_instructions, input, model_settings, tools, output_schema, handoffs, stream + system_instructions, + input, + model_settings, + tools, + output_schema, + handoffs, + prev_response_id, + stream, ): class DummyStream: async def __aiter__(self): @@ -217,7 +294,14 @@ async def __aiter__(self): # Consume the stream to trigger processing of the final response async for _ in model.stream_response( - "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED + "instr", + "input", + ModelSettings(), + [], + None, + [], + ModelTracing.DISABLED, + previous_response_id=None, ): pass diff --git a/tests/voice/test_workflow.py b/tests/voice/test_workflow.py index 3f18c049..72a3370d 100644 --- a/tests/voice/test_workflow.py +++ b/tests/voice/test_workflow.py @@ -51,6 +51,8 @@ async def get_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> ModelResponse: raise NotImplementedError("Not implemented") @@ -63,6 +65,8 @@ async def stream_response( output_schema: AgentOutputSchema | None, handoffs: list[Handoff], tracing: ModelTracing, + *, + previous_response_id: str | None, ) -> AsyncIterator[TResponseStreamEvent]: output = self.get_next_output() for item in output: