From 79696b61da847ac3d2275a1edf455cf031ebe563 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 14 Apr 2025 17:55:36 -0400
Subject: [PATCH 1/2] Replace referencable_id with response_id

---
 src/agents/items.py                         |  2 +-
 src/agents/models/openai_chatcompletions.py |  2 +-
 src/agents/models/openai_responses.py       |  2 +-
 src/agents/run.py                           |  2 +-
 tests/fake_model.py                         |  2 +-
 tests/test_items_helpers.py                 | 12 ++++----
 tests/test_openai_chatcompletions.py        |  2 +-
 tests/test_run_step_execution.py            | 20 ++++++-------
 tests/test_run_step_processing.py           | 32 ++++++++++-----------
 9 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/agents/items.py b/src/agents/items.py
index c2af0dfc..d72701ab 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -166,7 +166,7 @@ class ModelResponse:
     usage: Usage
     """The usage information for the response."""
 
-    referenceable_id: str | None
+    response_id: str | None
     """An ID for the response which can be used to refer to the response in subsequent calls to the
     model. Not supported by all model providers.
     """
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
index 6978ee30..b60d5d6a 100644
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@@ -156,7 +156,7 @@ async def get_response(
             return ModelResponse(
                 output=items,
                 usage=usage,
-                referenceable_id=None,
+                response_id=None,
             )
 
     async def stream_response(
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index 055ab79b..e509d6f8 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -120,7 +120,7 @@ async def get_response(
         return ModelResponse(
             output=response.output,
             usage=usage,
-            referenceable_id=response.id,
+            response_id=response.id,
         )
 
     async def stream_response(
diff --git a/src/agents/run.py b/src/agents/run.py
index 0159822a..93e6490c 100644
--- a/src/agents/run.py
+++ b/src/agents/run.py
@@ -677,7 +677,7 @@ async def _run_single_turn_streamed(
                 final_response = ModelResponse(
                     output=event.response.output,
                     usage=usage,
-                    referenceable_id=event.response.id,
+                    response_id=event.response.id,
                 )
 
             streamed_result._event_queue.put_nowait(RawResponsesStreamEvent(data=event))
diff --git a/tests/fake_model.py b/tests/fake_model.py
index ecbb7583..61fb5951 100644
--- a/tests/fake_model.py
+++ b/tests/fake_model.py
@@ -81,7 +81,7 @@ async def get_response(
             return ModelResponse(
                 output=output,
                 usage=Usage(),
-                referenceable_id=None,
+                response_id=None,
             )
 
     async def stream_response(
diff --git a/tests/test_items_helpers.py b/tests/test_items_helpers.py
index 90fe6475..5dba21d8 100644
--- a/tests/test_items_helpers.py
+++ b/tests/test_items_helpers.py
@@ -168,7 +168,7 @@ def test_to_input_items_for_message() -> None:
     message = ResponseOutputMessage(
         id="m1", content=[content], role="assistant", status="completed", type="message"
     )
-    resp = ModelResponse(output=[message], usage=Usage(), referenceable_id=None)
+    resp = ModelResponse(output=[message], usage=Usage(), response_id=None)
     input_items = resp.to_input_items()
     assert isinstance(input_items, list) and len(input_items) == 1
     # The dict should contain exactly the primitive values of the message
@@ -193,7 +193,7 @@ def test_to_input_items_for_function_call() -> None:
     tool_call = ResponseFunctionToolCall(
         id="f1", arguments="{}", call_id="c1", name="func", type="function_call"
     )
-    resp = ModelResponse(output=[tool_call], usage=Usage(), referenceable_id=None)
+    resp = ModelResponse(output=[tool_call], usage=Usage(), response_id=None)
     input_items = resp.to_input_items()
     assert isinstance(input_items, list) and len(input_items) == 1
     expected: ResponseFunctionToolCallParam = {
@@ -211,7 +211,7 @@ def test_to_input_items_for_file_search_call() -> None:
     fs_call = ResponseFileSearchToolCall(
         id="fs1", queries=["query"], status="completed", type="file_search_call"
     )
-    resp = ModelResponse(output=[fs_call], usage=Usage(), referenceable_id=None)
+    resp = ModelResponse(output=[fs_call], usage=Usage(), response_id=None)
     input_items = resp.to_input_items()
     assert isinstance(input_items, list) and len(input_items) == 1
     expected: ResponseFileSearchToolCallParam = {
@@ -226,7 +226,7 @@ def test_to_input_items_for_file_search_call() -> None:
 def test_to_input_items_for_web_search_call() -> None:
     """A web search tool call output should produce the same dict as a web search input."""
     ws_call = ResponseFunctionWebSearch(id="w1", status="completed", type="web_search_call")
-    resp = ModelResponse(output=[ws_call], usage=Usage(), referenceable_id=None)
+    resp = ModelResponse(output=[ws_call], usage=Usage(), response_id=None)
     input_items = resp.to_input_items()
     assert isinstance(input_items, list) and len(input_items) == 1
     expected: ResponseFunctionWebSearchParam = {
@@ -248,7 +248,7 @@ def test_to_input_items_for_computer_call_click() -> None:
         pending_safety_checks=[],
         status="completed",
     )
-    resp = ModelResponse(output=[comp_call], usage=Usage(), referenceable_id=None)
+    resp = ModelResponse(output=[comp_call], usage=Usage(), response_id=None)
     input_items = resp.to_input_items()
     assert isinstance(input_items, list) and len(input_items) == 1
     converted_dict = input_items[0]
@@ -268,7 +268,7 @@ def test_to_input_items_for_reasoning() -> None:
     """A reasoning output should produce the same dict as a reasoning input item."""
     rc = Summary(text="why", type="summary_text")
     reasoning = ResponseReasoningItem(id="rid1", summary=[rc], type="reasoning")
-    resp = ModelResponse(output=[reasoning], usage=Usage(), referenceable_id=None)
+    resp = ModelResponse(output=[reasoning], usage=Usage(), response_id=None)
     input_items = resp.to_input_items()
     assert isinstance(input_items, list) and len(input_items) == 1
     converted_dict = input_items[0]
diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py
index 281d7b41..3608fc57 100644
--- a/tests/test_openai_chatcompletions.py
+++ b/tests/test_openai_chatcompletions.py
@@ -80,7 +80,7 @@ async def patched_fetch_response(self, *args, **kwargs):
     assert resp.usage.input_tokens == 7
     assert resp.usage.output_tokens == 5
     assert resp.usage.total_tokens == 12
-    assert resp.referenceable_id is None
+    assert resp.response_id is None
 
 
 @pytest.mark.allow_call_model_methods
diff --git a/tests/test_run_step_execution.py b/tests/test_run_step_execution.py
index 16c62c84..6ae25fbd 100644
--- a/tests/test_run_step_execution.py
+++ b/tests/test_run_step_execution.py
@@ -43,7 +43,7 @@ async def test_empty_response_is_final_output():
     response = ModelResponse(
         output=[],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent, response)
 
@@ -59,7 +59,7 @@ async def test_plaintext_agent_no_tool_calls_is_final_output():
     response = ModelResponse(
         output=[get_text_message("hello_world")],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent, response)
 
@@ -79,7 +79,7 @@ async def test_plaintext_agent_no_tool_calls_multiple_messages_is_final_output()
             get_text_message("bye"),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(
         agent,
@@ -105,7 +105,7 @@ async def test_plaintext_agent_with_tool_call_is_run_again():
     response = ModelResponse(
         output=[get_text_message("hello_world"), get_function_tool_call("test", "")],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent, response)
 
@@ -140,7 +140,7 @@ async def test_multiple_tool_calls():
             get_function_tool_call("test_2"),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
 
     result = await get_execute_result(agent, response)
@@ -166,7 +166,7 @@ async def test_handoff_output_leads_to_handoff_next_step():
     response = ModelResponse(
         output=[get_text_message("Hello, world!"), get_handoff_tool_call(agent_1)],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent_3, response)
 
@@ -186,7 +186,7 @@ async def test_final_output_without_tool_runs_again():
     response = ModelResponse(
         output=[get_function_tool_call("tool_1")],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent, response)
 
@@ -203,7 +203,7 @@ async def test_final_output_leads_to_final_output_next_step():
             get_final_output_message(Foo(bar="123").model_dump_json()),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent, response)
 
@@ -222,7 +222,7 @@ async def test_handoff_and_final_output_leads_to_handoff_next_step():
             get_handoff_tool_call(agent_1),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent_3, response)
 
@@ -241,7 +241,7 @@ async def test_multiple_final_output_leads_to_final_output_next_step():
             get_final_output_message(Foo(bar="456").model_dump_json()),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = await get_execute_result(agent_3, response)
 
diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py
index 2a6634ac..2ea98f06 100644
--- a/tests/test_run_step_processing.py
+++ b/tests/test_run_step_processing.py
@@ -39,7 +39,7 @@ def test_empty_response():
     response = ModelResponse(
         output=[],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
 
     result = RunImpl.process_model_response(
@@ -58,7 +58,7 @@ def test_no_tool_calls():
     response = ModelResponse(
         output=[get_text_message("Hello, world!")],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent, response=response, output_schema=None, handoffs=[], all_tools=[]
@@ -76,7 +76,7 @@ async def test_single_tool_call():
             get_function_tool_call("test", ""),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent,
@@ -102,7 +102,7 @@ async def test_missing_tool_call_raises_error():
             get_function_tool_call("missing", ""),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
 
     with pytest.raises(ModelBehaviorError):
@@ -132,7 +132,7 @@ async def test_multiple_tool_calls():
             get_function_tool_call("test_2", "xyz"),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
 
     result = RunImpl.process_model_response(
@@ -162,7 +162,7 @@ async def test_handoffs_parsed_correctly():
     response = ModelResponse(
         output=[get_text_message("Hello, world!")],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent_3,
@@ -176,7 +176,7 @@ async def test_handoffs_parsed_correctly():
     response = ModelResponse(
         output=[get_text_message("Hello, world!"), get_handoff_tool_call(agent_1)],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent_3,
@@ -205,7 +205,7 @@ async def test_missing_handoff_fails():
     response = ModelResponse(
         output=[get_text_message("Hello, world!"), get_handoff_tool_call(agent_2)],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     with pytest.raises(ModelBehaviorError):
         RunImpl.process_model_response(
@@ -229,7 +229,7 @@ async def test_multiple_handoffs_doesnt_error():
             get_handoff_tool_call(agent_2),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent_3,
@@ -254,7 +254,7 @@ async def test_final_output_parsed_correctly():
             get_final_output_message(Foo(bar="123").model_dump_json()),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
 
     RunImpl.process_model_response(
@@ -281,7 +281,7 @@ async def test_file_search_tool_call_parsed_correctly():
     response = ModelResponse(
         output=[get_text_message("hello"), file_search_call],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent,
@@ -306,7 +306,7 @@ async def test_function_web_search_tool_call_parsed_correctly():
     response = ModelResponse(
         output=[get_text_message("hello"), web_search_call],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent,
@@ -333,7 +333,7 @@ async def test_reasoning_item_parsed_correctly():
     response = ModelResponse(
         output=[reasoning],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=Agent(name="test"),
@@ -401,7 +401,7 @@ async def test_computer_tool_call_without_computer_tool_raises_error():
     response = ModelResponse(
         output=[computer_call],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     with pytest.raises(ModelBehaviorError):
         RunImpl.process_model_response(
@@ -430,7 +430,7 @@ async def test_computer_tool_call_with_computer_tool_parsed_correctly():
     response = ModelResponse(
         output=[computer_call],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
     result = RunImpl.process_model_response(
         agent=agent,
@@ -460,7 +460,7 @@ async def test_tool_and_handoff_parsed_correctly():
             get_handoff_tool_call(agent_1),
         ],
         usage=Usage(),
-        referenceable_id=None,
+        response_id=None,
     )
 
     result = RunImpl.process_model_response(

From d6226059df2c4a3056fd9f33f3836286f0ded1c1 Mon Sep 17 00:00:00 2001
From: Rohan Mehta <rm@openai.com>
Date: Mon, 14 Apr 2025 17:55:42 -0400
Subject: [PATCH 2/2] Previous response id

---
 src/agents/items.py                         |   2 +
 src/agents/models/interface.py              |   8 ++
 src/agents/models/openai_chatcompletions.py |   3 +
 src/agents/models/openai_responses.py       |   9 ++
 src/agents/result.py                        |   8 ++
 src/agents/run.py                           |  22 +++-
 tests/fake_model.py                         |   4 +
 tests/test_openai_chatcompletions.py        |   3 +
 tests/test_openai_chatcompletions_stream.py |   3 +
 tests/test_responses_tracing.py             | 108 +++++++++++++++++---
 tests/voice/test_workflow.py                |   4 +
 11 files changed, 161 insertions(+), 13 deletions(-)

diff --git a/src/agents/items.py b/src/agents/items.py
index d72701ab..8fb2b52a 100644
--- a/src/agents/items.py
+++ b/src/agents/items.py
@@ -169,6 +169,8 @@ class ModelResponse:
     response_id: str | None
     """An ID for the response which can be used to refer to the response in subsequent calls to the
     model. Not supported by all model providers.
+    If using OpenAI models via the Responses API, this is the `response_id` parameter, and it can
+    be passed to `Runner.run`.
     """
 
     def to_input_items(self) -> list[TResponseInputItem]:
diff --git a/src/agents/models/interface.py b/src/agents/models/interface.py
index e9a8700c..bcf2c1a6 100644
--- a/src/agents/models/interface.py
+++ b/src/agents/models/interface.py
@@ -44,6 +44,8 @@ async def get_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> ModelResponse:
         """Get a response from the model.
 
@@ -55,6 +57,8 @@ async def get_response(
             output_schema: The output schema to use.
             handoffs: The handoffs available to the model.
             tracing: Tracing configuration.
+            previous_response_id: the ID of the previous response. Generally not used by the model,
+                except for the OpenAI Responses API.
 
         Returns:
             The full model response.
@@ -71,6 +75,8 @@ def stream_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         """Stream a response from the model.
 
@@ -82,6 +88,8 @@ def stream_response(
             output_schema: The output schema to use.
             handoffs: The handoffs available to the model.
             tracing: Tracing configuration.
+            previous_response_id: the ID of the previous response. Generally not used by the model,
+                except for the OpenAI Responses API.
 
         Returns:
             An iterator of response stream events, in OpenAI Responses format.
diff --git a/src/agents/models/openai_chatcompletions.py b/src/agents/models/openai_chatcompletions.py
index b60d5d6a..c12fe68a 100644
--- a/src/agents/models/openai_chatcompletions.py
+++ b/src/agents/models/openai_chatcompletions.py
@@ -108,6 +108,7 @@ async def get_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        previous_response_id: str | None,
     ) -> ModelResponse:
         with generation_span(
             model=str(self.model),
@@ -168,6 +169,8 @@ async def stream_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         """
         Yields a partial message as it is generated, as well as the usage information.
diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py
index e509d6f8..ab4617d4 100644
--- a/src/agents/models/openai_responses.py
+++ b/src/agents/models/openai_responses.py
@@ -69,6 +69,7 @@ async def get_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        previous_response_id: str | None,
     ) -> ModelResponse:
         with response_span(disabled=tracing.is_disabled()) as span_response:
             try:
@@ -79,6 +80,7 @@ async def get_response(
                     tools,
                     output_schema,
                     handoffs,
+                    previous_response_id,
                     stream=False,
                 )
 
@@ -132,6 +134,7 @@ async def stream_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        previous_response_id: str | None,
     ) -> AsyncIterator[ResponseStreamEvent]:
         """
         Yields a partial message as it is generated, as well as the usage information.
@@ -145,6 +148,7 @@ async def stream_response(
                     tools,
                     output_schema,
                     handoffs,
+                    previous_response_id,
                     stream=True,
                 )
 
@@ -180,6 +184,7 @@ async def _fetch_response(
         tools: list[Tool],
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
+        previous_response_id: str | None,
         stream: Literal[True],
     ) -> AsyncStream[ResponseStreamEvent]: ...
 
@@ -192,6 +197,7 @@ async def _fetch_response(
         tools: list[Tool],
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
+        previous_response_id: str | None,
         stream: Literal[False],
     ) -> Response: ...
 
@@ -203,6 +209,7 @@ async def _fetch_response(
         tools: list[Tool],
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
+        previous_response_id: str | None,
         stream: Literal[True] | Literal[False] = False,
     ) -> Response | AsyncStream[ResponseStreamEvent]:
         list_input = ItemHelpers.input_to_new_input_list(input)
@@ -229,9 +236,11 @@ async def _fetch_response(
                 f"Stream: {stream}\n"
                 f"Tool choice: {tool_choice}\n"
                 f"Response format: {response_format}\n"
+                f"Previous response id: {previous_response_id}\n"
             )
 
         return await self._client.responses.create(
+            previous_response_id=self._non_null_or_not_given(previous_response_id),
             instructions=self._non_null_or_not_given(system_instructions),
             model=self.model,
             input=list_input,
diff --git a/src/agents/result.py b/src/agents/result.py
index 40a64806..a2a6cc4a 100644
--- a/src/agents/result.py
+++ b/src/agents/result.py
@@ -80,6 +80,14 @@ def to_input_list(self) -> list[TResponseInputItem]:
 
         return original_items + new_items
 
+    @property
+    def last_response_id(self) -> str | None:
+        """Convenience method to get the response ID of the last model response."""
+        if not self.raw_responses:
+            return None
+
+        return self.raw_responses[-1].response_id
+
 
 @dataclass
 class RunResult(RunResultBase):
diff --git a/src/agents/run.py b/src/agents/run.py
index 93e6490c..e2b0dbce 100644
--- a/src/agents/run.py
+++ b/src/agents/run.py
@@ -117,6 +117,7 @@ async def run(
         max_turns: int = DEFAULT_MAX_TURNS,
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
+        previous_response_id: str | None = None,
     ) -> RunResult:
         """Run a workflow starting at the given agent. The agent will run in a loop until a final
         output is generated. The loop runs like so:
@@ -141,6 +142,8 @@ async def run(
                 AI invocation (including any tool calls that might occur).
             hooks: An object that receives callbacks on various lifecycle events.
             run_config: Global settings for the entire agent run.
+            previous_response_id: The ID of the previous response, if using OpenAI models via the
+                Responses API, this allows you to skip passing in input from the previous turn.
 
         Returns:
             A run result containing all the inputs, guardrail results and the output of the last
@@ -230,6 +233,7 @@ async def run(
                                 run_config=run_config,
                                 should_run_agent_start_hooks=should_run_agent_start_hooks,
                                 tool_use_tracker=tool_use_tracker,
+                                previous_response_id=previous_response_id,
                             ),
                         )
                     else:
@@ -243,6 +247,7 @@ async def run(
                             run_config=run_config,
                             should_run_agent_start_hooks=should_run_agent_start_hooks,
                             tool_use_tracker=tool_use_tracker,
+                            previous_response_id=previous_response_id,
                         )
                     should_run_agent_start_hooks = False
 
@@ -291,6 +296,7 @@ def run_sync(
         max_turns: int = DEFAULT_MAX_TURNS,
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
+        previous_response_id: str | None = None,
     ) -> RunResult:
         """Run a workflow synchronously, starting at the given agent. Note that this just wraps the
         `run` method, so it will not work if there's already an event loop (e.g. inside an async
@@ -319,6 +325,8 @@ def run_sync(
                 AI invocation (including any tool calls that might occur).
             hooks: An object that receives callbacks on various lifecycle events.
             run_config: Global settings for the entire agent run.
+            previous_response_id: The ID of the previous response, if using OpenAI models via the
+                Responses API, this allows you to skip passing in input from the previous turn.
 
         Returns:
             A run result containing all the inputs, guardrail results and the output of the last
@@ -332,6 +340,7 @@ def run_sync(
                 max_turns=max_turns,
                 hooks=hooks,
                 run_config=run_config,
+                previous_response_id=previous_response_id,
             )
         )
 
@@ -344,6 +353,7 @@ def run_streamed(
         max_turns: int = DEFAULT_MAX_TURNS,
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
+        previous_response_id: str | None = None,
     ) -> RunResultStreaming:
         """Run a workflow starting at the given agent in streaming mode. The returned result object
         contains a method you can use to stream semantic events as they are generated.
@@ -370,7 +380,8 @@ def run_streamed(
                 AI invocation (including any tool calls that might occur).
             hooks: An object that receives callbacks on various lifecycle events.
             run_config: Global settings for the entire agent run.
-
+            previous_response_id: The ID of the previous response, if using OpenAI models via the
+                Responses API, this allows you to skip passing in input from the previous turn.
         Returns:
             A result object that contains data about the run, as well as a method to stream events.
         """
@@ -428,6 +439,7 @@ def run_streamed(
                 hooks=hooks,
                 context_wrapper=context_wrapper,
                 run_config=run_config,
+                previous_response_id=previous_response_id,
             )
         )
         return streamed_result
@@ -485,6 +497,7 @@ async def _run_streamed_impl(
         hooks: RunHooks[TContext],
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
+        previous_response_id: str | None,
     ):
         current_span: Span[AgentSpanData] | None = None
         current_agent = starting_agent
@@ -554,6 +567,7 @@ async def _run_streamed_impl(
                         should_run_agent_start_hooks,
                         tool_use_tracker,
                         all_tools,
+                        previous_response_id,
                     )
                     should_run_agent_start_hooks = False
 
@@ -623,6 +637,7 @@ async def _run_single_turn_streamed(
         should_run_agent_start_hooks: bool,
         tool_use_tracker: AgentToolUseTracker,
         all_tools: list[Tool],
+        previous_response_id: str | None,
     ) -> SingleStepResult:
         if should_run_agent_start_hooks:
             await asyncio.gather(
@@ -662,6 +677,7 @@ async def _run_single_turn_streamed(
             get_model_tracing_impl(
                 run_config.tracing_disabled, run_config.trace_include_sensitive_data
             ),
+            previous_response_id=previous_response_id,
         ):
             if isinstance(event, ResponseCompletedEvent):
                 usage = (
@@ -717,6 +733,7 @@ async def _run_single_turn(
         run_config: RunConfig,
         should_run_agent_start_hooks: bool,
         tool_use_tracker: AgentToolUseTracker,
+        previous_response_id: str | None,
     ) -> SingleStepResult:
         # Ensure we run the hooks before anything else
         if should_run_agent_start_hooks:
@@ -746,6 +763,7 @@ async def _run_single_turn(
             context_wrapper,
             run_config,
             tool_use_tracker,
+            previous_response_id,
         )
 
         return await cls._get_single_step_result_from_response(
@@ -888,6 +906,7 @@ async def _get_new_response(
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
         tool_use_tracker: AgentToolUseTracker,
+        previous_response_id: str | None,
     ) -> ModelResponse:
         model = cls._get_model(agent, run_config)
         model_settings = agent.model_settings.resolve(run_config.model_settings)
@@ -903,6 +922,7 @@ async def _get_new_response(
             tracing=get_model_tracing_impl(
                 run_config.tracing_disabled, run_config.trace_include_sensitive_data
             ),
+            previous_response_id=previous_response_id,
         )
 
         context_wrapper.usage.add(new_response.usage)
diff --git a/tests/fake_model.py b/tests/fake_model.py
index 61fb5951..203479d0 100644
--- a/tests/fake_model.py
+++ b/tests/fake_model.py
@@ -54,6 +54,8 @@ async def get_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> ModelResponse:
         self.last_turn_args = {
             "system_instructions": system_instructions,
@@ -93,6 +95,8 @@ async def stream_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         with generation_span(disabled=not self.tracing_enabled) as span:
             output = self.get_next_output()
diff --git a/tests/test_openai_chatcompletions.py b/tests/test_openai_chatcompletions.py
index 3608fc57..92d65fda 100644
--- a/tests/test_openai_chatcompletions.py
+++ b/tests/test_openai_chatcompletions.py
@@ -67,6 +67,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         output_schema=None,
         handoffs=[],
         tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
     )
     # Should have produced exactly one output message with one text part
     assert isinstance(resp, ModelResponse)
@@ -115,6 +116,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         output_schema=None,
         handoffs=[],
         tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
     )
     assert len(resp.output) == 1
     assert isinstance(resp.output[0], ResponseOutputMessage)
@@ -164,6 +166,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         output_schema=None,
         handoffs=[],
         tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
     )
     # Expect a message item followed by a function tool call item.
     assert len(resp.output) == 2
diff --git a/tests/test_openai_chatcompletions_stream.py b/tests/test_openai_chatcompletions_stream.py
index 7add92a6..b82f2430 100644
--- a/tests/test_openai_chatcompletions_stream.py
+++ b/tests/test_openai_chatcompletions_stream.py
@@ -79,6 +79,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         output_schema=None,
         handoffs=[],
         tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
     ):
         output_events.append(event)
     # We expect a response.created, then a response.output_item.added, content part added,
@@ -168,6 +169,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         output_schema=None,
         handoffs=[],
         tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
     ):
         output_events.append(event)
     # Expect sequence similar to text: created, output_item.added, content part added,
@@ -255,6 +257,7 @@ async def patched_fetch_response(self, *args, **kwargs):
         output_schema=None,
         handoffs=[],
         tracing=ModelTracing.DISABLED,
+        previous_response_id=None,
     ):
         output_events.append(event)
     # Sequence should be: response.created, then after loop we expect function call-related events:
diff --git a/tests/test_responses_tracing.py b/tests/test_responses_tracing.py
index 40bdfafb..0bc97a95 100644
--- a/tests/test_responses_tracing.py
+++ b/tests/test_responses_tracing.py
@@ -44,7 +44,14 @@ async def test_get_response_creates_trace(monkeypatch):
 
         # Mock _fetch_response to return a dummy response with a known id
         async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
         ):
             return DummyResponse()
 
@@ -52,7 +59,14 @@ async def dummy_fetch_response(
 
         # Call get_response
         await model.get_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED,
+            previous_response_id=None,
         )
 
     assert fetch_normalized_spans() == snapshot(
@@ -74,7 +88,14 @@ async def test_non_data_tracing_doesnt_set_response_id(monkeypatch):
 
         # Mock _fetch_response to return a dummy response with a known id
         async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
         ):
             return DummyResponse()
 
@@ -82,7 +103,14 @@ async def dummy_fetch_response(
 
         # Call get_response
         await model.get_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED_WITHOUT_DATA,
+            previous_response_id=None,
         )
 
     assert fetch_normalized_spans() == snapshot(
@@ -102,7 +130,14 @@ async def test_disable_tracing_does_not_create_span(monkeypatch):
 
         # Mock _fetch_response to return a dummy response with a known id
         async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
         ):
             return DummyResponse()
 
@@ -110,7 +145,14 @@ async def dummy_fetch_response(
 
         # Call get_response
         await model.get_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.DISABLED,
+            previous_response_id=None,
         )
 
     assert fetch_normalized_spans() == snapshot([{"workflow_name": "test"}])
@@ -127,7 +169,14 @@ async def test_stream_response_creates_trace(monkeypatch):
 
         # Define a dummy fetch function that returns an async stream with a dummy response
         async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
         ):
             class DummyStream:
                 async def __aiter__(self):
@@ -142,7 +191,14 @@ async def __aiter__(self):
 
         # Consume the stream to trigger processing of the final response
         async for _ in model.stream_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED,
+            previous_response_id=None,
         ):
             pass
 
@@ -165,7 +221,14 @@ async def test_stream_non_data_tracing_doesnt_set_response_id(monkeypatch):
 
         # Define a dummy fetch function that returns an async stream with a dummy response
         async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
         ):
             class DummyStream:
                 async def __aiter__(self):
@@ -180,7 +243,14 @@ async def __aiter__(self):
 
         # Consume the stream to trigger processing of the final response
         async for _ in model.stream_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.ENABLED_WITHOUT_DATA
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.ENABLED_WITHOUT_DATA,
+            previous_response_id=None,
         ):
             pass
 
@@ -202,7 +272,14 @@ async def test_stream_disabled_tracing_doesnt_create_span(monkeypatch):
 
         # Define a dummy fetch function that returns an async stream with a dummy response
         async def dummy_fetch_response(
-            system_instructions, input, model_settings, tools, output_schema, handoffs, stream
+            system_instructions,
+            input,
+            model_settings,
+            tools,
+            output_schema,
+            handoffs,
+            prev_response_id,
+            stream,
         ):
             class DummyStream:
                 async def __aiter__(self):
@@ -217,7 +294,14 @@ async def __aiter__(self):
 
         # Consume the stream to trigger processing of the final response
         async for _ in model.stream_response(
-            "instr", "input", ModelSettings(), [], None, [], ModelTracing.DISABLED
+            "instr",
+            "input",
+            ModelSettings(),
+            [],
+            None,
+            [],
+            ModelTracing.DISABLED,
+            previous_response_id=None,
         ):
             pass
 
diff --git a/tests/voice/test_workflow.py b/tests/voice/test_workflow.py
index 3f18c049..72a3370d 100644
--- a/tests/voice/test_workflow.py
+++ b/tests/voice/test_workflow.py
@@ -51,6 +51,8 @@ async def get_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> ModelResponse:
         raise NotImplementedError("Not implemented")
 
@@ -63,6 +65,8 @@ async def stream_response(
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
         tracing: ModelTracing,
+        *,
+        previous_response_id: str | None,
     ) -> AsyncIterator[TResponseStreamEvent]:
         output = self.get_next_output()
         for item in output: