Skip to content

Commit bd5b4ab

Browse files
feat(api): add service tier argument for chat completions (#900)
1 parent 70d2bb3 commit bd5b4ab

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

.stats.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 64
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-5cb1810135c35c5024698f3365626471a04796e26e393aefe1aa0ba3c0891919.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-8fe357c6b5a425d810d731e4102a052d8e38c5e2d66950e6de1025415160bf88.yml

src/resources/chat/completions.ts

+25
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ export interface ChatCompletion {
6666
*/
6767
object: 'chat.completion';
6868

69+
/**
70+
* The service tier used for processing the request. This field is only included if
71+
* the `service_tier` parameter is specified in the request.
72+
*/
73+
service_tier?: 'scale' | 'default' | null;
74+
6975
/**
7076
* This fingerprint represents the backend configuration that the model runs with.
7177
*
@@ -205,6 +211,12 @@ export interface ChatCompletionChunk {
205211
*/
206212
object: 'chat.completion.chunk';
207213

214+
/**
215+
* The service tier used for processing the request. This field is only included if
216+
* the `service_tier` parameter is specified in the request.
217+
*/
218+
service_tier?: 'scale' | 'default' | null;
219+
208220
/**
209221
* This fingerprint represents the backend configuration that the model runs with.
210222
* Can be used in conjunction with the `seed` request parameter to understand when
@@ -800,6 +812,19 @@ export interface ChatCompletionCreateParamsBase {
800812
*/
801813
seed?: number | null;
802814

815+
/**
816+
* Specifies the latency tier to use for processing the request. This parameter is
817+
* relevant for customers subscribed to the scale tier service:
818+
*
819+
* - If set to 'auto', the system will utilize scale tier credits until they are
820+
* exhausted.
821+
* - If set to 'default', the request will be processed in the shared cluster.
822+
*
823+
* When this parameter is set, the response body will include the `service_tier`
824+
* utilized.
825+
*/
826+
service_tier?: 'auto' | 'default' | null;
827+
803828
/**
804829
* Up to 4 sequences where the API will stop generating further tokens.
805830
*/

tests/api-resources/chat/completions.test.ts

+1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ describe('resource completions', () => {
3838
presence_penalty: -2,
3939
response_format: { type: 'json_object' },
4040
seed: -9223372036854776000,
41+
service_tier: 'auto',
4142
stop: 'string',
4243
stream: false,
4344
stream_options: { include_usage: true },

0 commit comments

Comments
 (0)