diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index ba231b0760..9e1cdeda32 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.21.0"
+  ".": "1.21.1"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4a3607f6a2..20e8976936 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## 1.21.1 (2024-04-17)
+
+Full Changelog: [v1.21.0...v1.21.1](https://github.com/openai/openai-python/compare/v1.21.0...v1.21.1)
+
+### Chores
+
+* **api:** docs and response_format response property ([#1327](https://github.com/openai/openai-python/issues/1327)) ([7a6d142](https://github.com/openai/openai-python/commit/7a6d142f013994c4eb9a4f55888464c885f8baf0))
+
 ## 1.21.0 (2024-04-17)
 
 Full Changelog: [v1.20.0...v1.21.0](https://github.com/openai/openai-python/compare/v1.20.0...v1.21.0)
diff --git a/pyproject.toml b/pyproject.toml
index 978e82ae86..0ab25048b1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.21.0"
+version = "1.21.1"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 6f0fc92f2e..4bb6604548 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.21.0"  # x-release-please-version
+__version__ = "1.21.1"  # x-release-please-version
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 8695a949ca..c0338164e2 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -109,7 +109,7 @@ def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -253,7 +253,7 @@ def update(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -494,7 +494,7 @@ async def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -638,7 +638,7 @@ async def update(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index 7aab17a30d..e2488316b5 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -145,13 +145,13 @@ def create(
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -166,7 +166,7 @@ def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -190,7 +190,7 @@ def create(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -201,6 +201,11 @@ def create(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -287,13 +292,13 @@ def create(
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -308,7 +313,7 @@ def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -328,7 +333,7 @@ def create(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -339,6 +344,11 @@ def create(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -425,13 +435,13 @@ def create(
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -446,7 +456,7 @@ def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -466,7 +476,7 @@ def create(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -477,6 +487,11 @@ def create(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1659,13 +1674,13 @@ async def create(
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1680,7 +1695,7 @@ async def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1704,7 +1719,7 @@ async def create(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1715,6 +1730,11 @@ async def create(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1801,13 +1821,13 @@ async def create(
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1822,7 +1842,7 @@ async def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1842,7 +1862,7 @@ async def create(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1853,6 +1873,11 @@ async def create(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1939,13 +1964,13 @@ async def create(
           max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
               run. The run will make a best effort to use only the number of completion tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `complete`. See
+              completion tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
               The run will make a best effort to use only the number of prompt tokens
               specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `complete`. See
+              prompt tokens specified, the run will end with status `incomplete`. See
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
@@ -1960,7 +1985,7 @@ async def create(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1980,7 +2005,7 @@ async def create(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1991,6 +2016,11 @@ async def create(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 678c621a10..6e54faf469 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -340,7 +340,7 @@ def create_and_run(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -366,7 +366,7 @@ def create_and_run(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -382,6 +382,11 @@ def create_and_run(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -481,7 +486,7 @@ def create_and_run(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -503,7 +508,7 @@ def create_and_run(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -519,6 +524,11 @@ def create_and_run(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -618,7 +628,7 @@ def create_and_run(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -640,7 +650,7 @@ def create_and_run(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -656,6 +666,11 @@ def create_and_run(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1296,7 +1311,7 @@ async def create_and_run(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1322,7 +1337,7 @@ async def create_and_run(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1338,6 +1353,11 @@ async def create_and_run(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1437,7 +1457,7 @@ async def create_and_run(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1459,7 +1479,7 @@ async def create_and_run(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1475,6 +1495,11 @@ async def create_and_run(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -1574,7 +1599,7 @@ async def create_and_run(
 
           response_format: Specifies the format that the model must output. Compatible with
               [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+              all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
               Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
               message the model generates is valid JSON.
@@ -1596,7 +1621,7 @@ async def create_and_run(
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
               and means the model can pick between generating a message or calling a tool.
-              Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+              Specifying a particular tool like `{"type": "file_search"}` or
               `{"type": "function", "function": {"name": "my_function"}}` forces the model to
               call that tool.
 
@@ -1612,6 +1637,11 @@ async def create_and_run(
               model considers the results of the tokens with top_p probability mass. So 0.1
               means only the tokens comprising the top 10% probability mass are considered.
 
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index fa09efb0cc..0b997e0b0e 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -5,6 +5,7 @@
 
 from ..._models import BaseModel
 from .assistant_tool import AssistantTool
+from .assistant_response_format_option import AssistantResponseFormatOption
 
 __all__ = ["Assistant", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
@@ -81,6 +82,32 @@ class Assistant(BaseModel):
     `code_interpreter`, `file_search`, or `function`.
     """
 
+    response_format: Optional[AssistantResponseFormatOption] = None
+    """Specifies the format that the model must output.
+
+    Compatible with
+    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    message the model generates is valid JSON.
+
+    **Important:** when using JSON mode, you **must** also instruct the model to
+    produce JSON yourself via a system or user message. Without this, the model may
+    generate an unending stream of whitespace until the generation reaches the token
+    limit, resulting in a long-running and seemingly "stuck" request. Also note that
+    the message content may be partially cut off if `finish_reason="length"`, which
+    indicates the generation exceeded `max_tokens` or the conversation exceeded the
+    max context length.
+    """
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic.
+    """
+
     tool_resources: Optional[ToolResources] = None
     """A set of resources that are used by the assistant's tools.
 
@@ -88,3 +115,12 @@ class Assistant(BaseModel):
     `code_interpreter` tool requires a list of file IDs, while the `file_search`
     tool requires a list of vector store IDs.
     """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
+    """
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 925b85050f..e9ff66dfc3 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -77,7 +77,7 @@ class AssistantCreateParams(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index 1354b078a8..55c846ce4e 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -47,7 +47,7 @@ class AssistantUpdateParams(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index d7d5a758e8..0c102db705 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -106,7 +106,7 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -135,7 +135,7 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     Controls which (if any) tool is called by the model. `none` means the model will
     not call any tools and instead generates a message. `auto` is the default value
     and means the model can pick between generating a message or calling a tool.
-    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    Specifying a particular tool like `{"type": "file_search"}` or
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
     call that tool.
     """
@@ -159,9 +159,15 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     An alternative to sampling with temperature, called nucleus sampling, where the
     model considers the results of the tokens with top_p probability mass. So 0.1
     means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
     """
 
     truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
 
 
 class ThreadMessageAttachment(TypedDict, total=False):
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 8f427ce6e8..4fd5103348 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -162,7 +162,7 @@ class Run(BaseModel):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -197,7 +197,7 @@ class Run(BaseModel):
     Controls which (if any) tool is called by the model. `none` means the model will
     not call any tools and instead generates a message. `auto` is the default value
     and means the model can pick between generating a message or calling a tool.
-    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    Specifying a particular tool like `{"type": "file_search"}` or
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
     call that tool.
     """
@@ -210,6 +210,10 @@ class Run(BaseModel):
     """
 
     truncation_strategy: Optional[TruncationStrategy] = None
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
 
     usage: Optional[Usage] = None
     """Usage statistics related to the run.
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index fd0b4e7920..c1bb8ba62a 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -49,7 +49,7 @@ class RunCreateParamsBase(TypedDict, total=False):
     The maximum number of completion tokens that may be used over the course of the
     run. The run will make a best effort to use only the number of completion tokens
     specified, across multiple turns of the run. If the run exceeds the number of
-    completion tokens specified, the run will end with status `complete`. See
+    completion tokens specified, the run will end with status `incomplete`. See
     `incomplete_details` for more info.
     """
 
@@ -58,7 +58,7 @@ class RunCreateParamsBase(TypedDict, total=False):
 
     The run will make a best effort to use only the number of prompt tokens
     specified, across multiple turns of the run. If the run exceeds the number of
-    prompt tokens specified, the run will end with status `complete`. See
+    prompt tokens specified, the run will end with status `incomplete`. See
     `incomplete_details` for more info.
     """
 
@@ -106,7 +106,7 @@ class RunCreateParamsBase(TypedDict, total=False):
 
     Compatible with
     [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
+    all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
     Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
     message the model generates is valid JSON.
@@ -132,7 +132,7 @@ class RunCreateParamsBase(TypedDict, total=False):
     Controls which (if any) tool is called by the model. `none` means the model will
     not call any tools and instead generates a message. `auto` is the default value
     and means the model can pick between generating a message or calling a tool.
-    Specifying a particular tool like `{"type": "TOOL_TYPE"}` or
+    Specifying a particular tool like `{"type": "file_search"}` or
     `{"type": "function", "function": {"name": "my_function"}}` forces the model to
     call that tool.
     """
@@ -148,9 +148,15 @@ class RunCreateParamsBase(TypedDict, total=False):
     An alternative to sampling with temperature, called nucleus sampling, where the
     model considers the results of the tokens with top_p probability mass. So 0.1
     means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or temperature but not both.
     """
 
     truncation_strategy: Optional[TruncationStrategy]
+    """Controls for how a thread will be truncated prior to the run.
+
+    Use this to control the intial context window of the run.
+    """
 
 
 class AdditionalMessageAttachment(TypedDict, total=False):