Skip to content

Commit 458c075

Browse files
feat(api): new models for TTS, STT, + new audio features for Realtime (#298)
1 parent 1356ffd commit 458c075

10 files changed

+375
-14
lines changed

Diff for: .stats.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
configured_endpoints: 76
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b26121d5df6eb5d3032a45a267473798b15fcfec76dd44a3256cf1238be05fa4.yml
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c22f59c66aec7914b6ee653d3098d1c1c8c16c180d2a158e819c8ddbf476f74b.yml

Diff for: api.md

+7
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,16 @@ Params Types:
145145

146146
## Transcriptions
147147

148+
Params Types:
149+
150+
- <a href="https://pkg.go.dev/github.com/openai/openai-go">openai</a>.<a href="https://pkg.go.dev/github.com/openai/openai-go#TranscriptionInclude">TranscriptionInclude</a>
151+
148152
Response Types:
149153

150154
- <a href="https://pkg.go.dev/github.com/openai/openai-go">openai</a>.<a href="https://pkg.go.dev/github.com/openai/openai-go#Transcription">Transcription</a>
155+
- <a href="https://pkg.go.dev/github.com/openai/openai-go">openai</a>.<a href="https://pkg.go.dev/github.com/openai/openai-go#TranscriptionStreamEvent">TranscriptionStreamEvent</a>
156+
- <a href="https://pkg.go.dev/github.com/openai/openai-go">openai</a>.<a href="https://pkg.go.dev/github.com/openai/openai-go#TranscriptionTextDeltaEvent">TranscriptionTextDeltaEvent</a>
157+
- <a href="https://pkg.go.dev/github.com/openai/openai-go">openai</a>.<a href="https://pkg.go.dev/github.com/openai/openai-go#TranscriptionTextDoneEvent">TranscriptionTextDoneEvent</a>
151158

152159
Methods:
153160

Diff for: audio.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,14 @@ func NewAudioService(opts ...option.RequestOption) (r *AudioService) {
3434
type AudioModel = string
3535

3636
const (
37-
AudioModelWhisper1 AudioModel = "whisper-1"
37+
AudioModelWhisper1 AudioModel = "whisper-1"
38+
AudioModelGPT4oTranscribe AudioModel = "gpt-4o-transcribe"
39+
AudioModelGPT4oMiniTranscribe AudioModel = "gpt-4o-mini-transcribe"
3840
)
3941

4042
// The format of the output, in one of these options: `json`, `text`, `srt`,
41-
// `verbose_json`, or `vtt`.
43+
// `verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
44+
// the only supported format is `json`.
4245
type AudioResponseFormat string
4346

4447
const (

Diff for: audiospeech.go

+7-3
Original file line numberDiff line numberDiff line change
@@ -43,21 +43,25 @@ func (r *AudioSpeechService) New(ctx context.Context, body AudioSpeechNewParams,
4343
type SpeechModel = string
4444

4545
const (
46-
SpeechModelTTS1 SpeechModel = "tts-1"
47-
SpeechModelTTS1HD SpeechModel = "tts-1-hd"
46+
SpeechModelTTS1 SpeechModel = "tts-1"
47+
SpeechModelTTS1HD SpeechModel = "tts-1-hd"
48+
SpeechModelGPT4oMiniTTS SpeechModel = "gpt-4o-mini-tts"
4849
)
4950

5051
type AudioSpeechNewParams struct {
5152
// The text to generate audio for. The maximum length is 4096 characters.
5253
Input param.Field[string] `json:"input,required"`
5354
// One of the available [TTS models](https://platform.openai.com/docs/models#tts):
54-
// `tts-1` or `tts-1-hd`
55+
// `tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
5556
Model param.Field[SpeechModel] `json:"model,required"`
5657
// The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
5758
// `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
5859
// voices are available in the
5960
// [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
6061
Voice param.Field[AudioSpeechNewParamsVoice] `json:"voice,required"`
62+
// Control the voice of your generated audio with additional instructions. Does not
63+
// work with `tts-1` or `tts-1-hd`.
64+
Instructions param.Field[string] `json:"instructions"`
6165
// The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
6266
// `wav`, and `pcm`.
6367
ResponseFormat param.Field[AudioSpeechNewParamsResponseFormat] `json:"response_format"`

Diff for: audiospeech_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ func TestAudioSpeechNewWithOptionalParams(t *testing.T) {
3030
Input: openai.F("input"),
3131
Model: openai.F(openai.SpeechModelTTS1),
3232
Voice: openai.F(openai.AudioSpeechNewParamsVoiceAlloy),
33+
Instructions: openai.F("instructions"),
3334
ResponseFormat: openai.F(openai.AudioSpeechNewParamsResponseFormatMP3),
3435
Speed: openai.F(0.250000),
3536
})

0 commit comments

Comments
 (0)