Skip to content

Commit 6792170

Browse files
RobertCraigiestainless-app[bot]
authored andcommitted
fix(audio): correct types for transcriptions / translations (#1104)
1 parent 9e6c55a commit 6792170

File tree

6 files changed

+211
-11
lines changed

6 files changed

+211
-11
lines changed

.stats.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 68
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-17ddd746c775ca4d4fbe64e5621ac30756ef09c061ff6313190b6ec162222d4c.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-71e58a77027c67e003fdd1b1ac8ac11557d8bfabc7666d1a827c6b1ca8ab98b5.yml

api.md

+8-2
Original file line numberDiff line numberDiff line change
@@ -115,20 +115,26 @@ Types:
115115
Types:
116116

117117
- <code><a href="./src/resources/audio/transcriptions.ts">Transcription</a></code>
118+
- <code><a href="./src/resources/audio/transcriptions.ts">TranscriptionSegment</a></code>
119+
- <code><a href="./src/resources/audio/transcriptions.ts">TranscriptionVerbose</a></code>
120+
- <code><a href="./src/resources/audio/transcriptions.ts">TranscriptionWord</a></code>
121+
- <code><a href="./src/resources/audio/transcriptions.ts">TranscriptionCreateResponse</a></code>
118122

119123
Methods:
120124

121-
- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="./src/resources/audio/transcriptions.ts">create</a>({ ...params }) -> Transcription</code>
125+
- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="./src/resources/audio/transcriptions.ts">create</a>({ ...params }) -> TranscriptionCreateResponse</code>
122126

123127
## Translations
124128

125129
Types:
126130

127131
- <code><a href="./src/resources/audio/translations.ts">Translation</a></code>
132+
- <code><a href="./src/resources/audio/translations.ts">TranslationVerbose</a></code>
133+
- <code><a href="./src/resources/audio/translations.ts">TranslationCreateResponse</a></code>
128134

129135
Methods:
130136

131-
- <code title="post /audio/translations">client.audio.translations.<a href="./src/resources/audio/translations.ts">create</a>({ ...params }) -> Translation</code>
137+
- <code title="post /audio/translations">client.audio.translations.<a href="./src/resources/audio/translations.ts">create</a>({ ...params }) -> TranslationCreateResponse</code>
132138

133139
## Speech
134140

src/resources/audio/audio.ts

+6
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,15 @@ export namespace Audio {
2525
export import AudioResponseFormat = AudioAPI.AudioResponseFormat;
2626
export import Transcriptions = TranscriptionsAPI.Transcriptions;
2727
export import Transcription = TranscriptionsAPI.Transcription;
28+
export import TranscriptionSegment = TranscriptionsAPI.TranscriptionSegment;
29+
export import TranscriptionVerbose = TranscriptionsAPI.TranscriptionVerbose;
30+
export import TranscriptionWord = TranscriptionsAPI.TranscriptionWord;
31+
export import TranscriptionCreateResponse = TranscriptionsAPI.TranscriptionCreateResponse;
2832
export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
2933
export import Translations = TranslationsAPI.Translations;
3034
export import Translation = TranslationsAPI.Translation;
35+
export import TranslationVerbose = TranslationsAPI.TranslationVerbose;
36+
export import TranslationCreateResponse = TranslationsAPI.TranslationCreateResponse;
3137
export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
3238
export import Speech = SpeechAPI.Speech;
3339
export import SpeechModel = SpeechAPI.SpeechModel;

src/resources/audio/index.ts

+16-2
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,19 @@
22

33
export { AudioModel, AudioResponseFormat, Audio } from './audio';
44
export { SpeechModel, SpeechCreateParams, Speech } from './speech';
5-
export { Transcription, TranscriptionCreateParams, Transcriptions } from './transcriptions';
6-
export { Translation, TranslationCreateParams, Translations } from './translations';
5+
export {
6+
Transcription,
7+
TranscriptionSegment,
8+
TranscriptionVerbose,
9+
TranscriptionWord,
10+
TranscriptionCreateResponse,
11+
TranscriptionCreateParams,
12+
Transcriptions,
13+
} from './transcriptions';
14+
export {
15+
Translation,
16+
TranslationVerbose,
17+
TranslationCreateResponse,
18+
TranslationCreateParams,
19+
Translations,
20+
} from './translations';

src/resources/audio/transcriptions.ts

+133-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,22 @@ export class Transcriptions extends APIResource {
99
/**
1010
* Transcribes audio into the input language.
1111
*/
12-
create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription> {
12+
create(
13+
body: TranscriptionCreateParams<'json' | undefined>,
14+
options?: Core.RequestOptions,
15+
): Core.APIPromise<Transcription>;
16+
create(
17+
body: TranscriptionCreateParams<'verbose_json'>,
18+
options?: Core.RequestOptions,
19+
): Core.APIPromise<TranscriptionVerbose>;
20+
create(
21+
body: TranscriptionCreateParams<'srt' | 'vtt' | 'text'>,
22+
options?: Core.RequestOptions,
23+
): Core.APIPromise<string>;
24+
create(
25+
body: TranscriptionCreateParams,
26+
options?: Core.RequestOptions,
27+
): Core.APIPromise<TranscriptionCreateResponse | string> {
1328
return this._client.post('/audio/transcriptions', Core.multipartFormRequestOptions({ body, ...options }));
1429
}
1530
}
@@ -25,7 +40,118 @@ export interface Transcription {
2540
text: string;
2641
}
2742

28-
export interface TranscriptionCreateParams {
43+
export interface TranscriptionSegment {
44+
/**
45+
* Unique identifier of the segment.
46+
*/
47+
id: number;
48+
49+
/**
50+
* Average logprob of the segment. If the value is lower than -1, consider the
51+
* logprobs failed.
52+
*/
53+
avg_logprob: number;
54+
55+
/**
56+
* Compression ratio of the segment. If the value is greater than 2.4, consider the
57+
* compression failed.
58+
*/
59+
compression_ratio: number;
60+
61+
/**
62+
* End time of the segment in seconds.
63+
*/
64+
end: number;
65+
66+
/**
67+
* Probability of no speech in the segment. If the value is higher than 1.0 and the
68+
* `avg_logprob` is below -1, consider this segment silent.
69+
*/
70+
no_speech_prob: number;
71+
72+
/**
73+
* Seek offset of the segment.
74+
*/
75+
seek: number;
76+
77+
/**
78+
* Start time of the segment in seconds.
79+
*/
80+
start: number;
81+
82+
/**
83+
* Temperature parameter used for generating the segment.
84+
*/
85+
temperature: number;
86+
87+
/**
88+
* Text content of the segment.
89+
*/
90+
text: string;
91+
92+
/**
93+
* Array of token IDs for the text content.
94+
*/
95+
tokens: Array<number>;
96+
}
97+
98+
/**
99+
* Represents a verbose json transcription response returned by model, based on the
100+
* provided input.
101+
*/
102+
export interface TranscriptionVerbose {
103+
/**
104+
* The duration of the input audio.
105+
*/
106+
duration: string;
107+
108+
/**
109+
* The language of the input audio.
110+
*/
111+
language: string;
112+
113+
/**
114+
* The transcribed text.
115+
*/
116+
text: string;
117+
118+
/**
119+
* Segments of the transcribed text and their corresponding details.
120+
*/
121+
segments?: Array<TranscriptionSegment>;
122+
123+
/**
124+
* Extracted words and their corresponding timestamps.
125+
*/
126+
words?: Array<TranscriptionWord>;
127+
}
128+
129+
export interface TranscriptionWord {
130+
/**
131+
* End time of the word in seconds.
132+
*/
133+
end: number;
134+
135+
/**
136+
* Start time of the word in seconds.
137+
*/
138+
start: number;
139+
140+
/**
141+
* The text content of the word.
142+
*/
143+
word: string;
144+
}
145+
146+
/**
147+
* Represents a transcription response returned by model, based on the provided
148+
* input.
149+
*/
150+
export type TranscriptionCreateResponse = Transcription | TranscriptionVerbose;
151+
152+
export interface TranscriptionCreateParams<
153+
ResponseFormat extends AudioAPI.AudioResponseFormat | undefined = AudioAPI.AudioResponseFormat | undefined,
154+
> {
29155
/**
30156
* The audio file object (not file name) to transcribe, in one of these formats:
31157
* flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
@@ -57,7 +183,7 @@ export interface TranscriptionCreateParams {
57183
* The format of the output, in one of these options: `json`, `text`, `srt`,
58184
* `verbose_json`, or `vtt`.
59185
*/
60-
response_format?: AudioAPI.AudioResponseFormat;
186+
response_format?: ResponseFormat;
61187

62188
/**
63189
* The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
@@ -80,5 +206,9 @@ export interface TranscriptionCreateParams {
80206

81207
export namespace Transcriptions {
82208
export import Transcription = TranscriptionsAPI.Transcription;
209+
export import TranscriptionSegment = TranscriptionsAPI.TranscriptionSegment;
210+
export import TranscriptionVerbose = TranscriptionsAPI.TranscriptionVerbose;
211+
export import TranscriptionWord = TranscriptionsAPI.TranscriptionWord;
212+
export import TranscriptionCreateResponse = TranscriptionsAPI.TranscriptionCreateResponse;
83213
export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
84214
}

src/resources/audio/translations.ts

+47-3
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,28 @@ import { APIResource } from '../../resource';
44
import * as Core from '../../core';
55
import * as TranslationsAPI from './translations';
66
import * as AudioAPI from './audio';
7+
import * as TranscriptionsAPI from './transcriptions';
78

89
export class Translations extends APIResource {
910
/**
1011
* Translates audio into English.
1112
*/
12-
create(body: TranslationCreateParams, options?: Core.RequestOptions): Core.APIPromise<Translation> {
13+
create(
14+
body: TranslationCreateParams<'json' | undefined>,
15+
options?: Core.RequestOptions,
16+
): Core.APIPromise<Translation>;
17+
create(
18+
body: TranslationCreateParams<'verbose_json'>,
19+
options?: Core.RequestOptions,
20+
): Core.APIPromise<TranslationVerbose>;
21+
create(
22+
body: TranslationCreateParams<'text' | 'srt' | 'vtt'>,
23+
options?: Core.RequestOptions,
24+
): Core.APIPromise<string>;
25+
create(
26+
body: TranslationCreateParams,
27+
options?: Core.RequestOptions,
28+
): Core.APIPromise<TranslationCreateResponse | string> {
1329
return this._client.post('/audio/translations', Core.multipartFormRequestOptions({ body, ...options }));
1430
}
1531
}
@@ -18,7 +34,33 @@ export interface Translation {
1834
text: string;
1935
}
2036

21-
export interface TranslationCreateParams {
37+
export interface TranslationVerbose {
38+
/**
39+
* The duration of the input audio.
40+
*/
41+
duration: string;
42+
43+
/**
44+
* The language of the output translation (always `english`).
45+
*/
46+
language: string;
47+
48+
/**
49+
* The translated text.
50+
*/
51+
text: string;
52+
53+
/**
54+
* Segments of the translated text and their corresponding details.
55+
*/
56+
segments?: Array<TranscriptionsAPI.TranscriptionSegment>;
57+
}
58+
59+
export type TranslationCreateResponse = Translation | TranslationVerbose;
60+
61+
export interface TranslationCreateParams<
62+
ResponseFormat extends AudioAPI.AudioResponseFormat | undefined = AudioAPI.AudioResponseFormat | undefined,
63+
> {
2264
/**
2365
* The audio file object (not file name) translate, in one of these formats: flac,
2466
* mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
@@ -43,7 +85,7 @@ export interface TranslationCreateParams {
4385
* The format of the output, in one of these options: `json`, `text`, `srt`,
4486
* `verbose_json`, or `vtt`.
4587
*/
46-
response_format?: AudioAPI.AudioResponseFormat;
88+
response_format?: ResponseFormat;
4789

4890
/**
4991
* The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
@@ -57,5 +99,7 @@ export interface TranslationCreateParams {
5799

58100
export namespace Translations {
59101
export import Translation = TranslationsAPI.Translation;
102+
export import TranslationVerbose = TranslationsAPI.TranslationVerbose;
103+
export import TranslationCreateResponse = TranslationsAPI.TranslationCreateResponse;
60104
export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
61105
}

0 commit comments

Comments
 (0)