jamesrochabrun · jamesrochabrun · Jan 19, 2025 · Jan 19, 2025 · Jan 19, 2025 · Jan 19, 2025
diff --git a/Examples/SwiftOpenAIExample/SwiftOpenAIExample.xcodeproj/project.pbxproj b/Examples/SwiftOpenAIExample/SwiftOpenAIExample.xcodeproj/project.pbxproj
@@ -70,6 +70,8 @@
 		7BBE7EDE2B03718E0096A693 /* ChatFunctionCallProvider.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BBE7EDD2B03718E0096A693 /* ChatFunctionCallProvider.swift */; };
 		7BBE7EE02B0372550096A693 /* ChatFunctionCallDemoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BBE7EDF2B0372550096A693 /* ChatFunctionCallDemoView.swift */; };
 		7BE802592D2878170080E06A /* ChatPredictedOutputDemoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BE802582D2878170080E06A /* ChatPredictedOutputDemoView.swift */; };
+		7BE802B02D3CD1A60080E06A /* RealTimeAPIDemoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BE802AF2D3CD1A60080E06A /* RealTimeAPIDemoView.swift */; };
+		7BE802B22D3CD4600080E06A /* RealTimeAPIViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BE802B12D3CD4600080E06A /* RealTimeAPIViewModel.swift */; };
 		7BE9A5AF2B0B33E600CE8103 /* SwiftOpenAIExampleTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 7BA788DE2AE23A49008825D5 /* SwiftOpenAIExampleTests.swift */; };
 /* End PBXBuildFile section */
 
@@ -158,6 +160,8 @@
 		7BBE7EDD2B03718E0096A693 /* ChatFunctionCallProvider.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatFunctionCallProvider.swift; sourceTree = "<group>"; };
 		7BBE7EDF2B0372550096A693 /* ChatFunctionCallDemoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatFunctionCallDemoView.swift; sourceTree = "<group>"; };
 		7BE802582D2878170080E06A /* ChatPredictedOutputDemoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ChatPredictedOutputDemoView.swift; sourceTree = "<group>"; };
+		7BE802AF2D3CD1A60080E06A /* RealTimeAPIDemoView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RealTimeAPIDemoView.swift; sourceTree = "<group>"; };
+		7BE802B12D3CD4600080E06A /* RealTimeAPIViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RealTimeAPIViewModel.swift; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -380,6 +384,7 @@
 			isa = PBXGroup;
 			children = (
 				7BA788CC2AE23A48008825D5 /* SwiftOpenAIExampleApp.swift */,
+				7BE802AE2D3CD15B0080E06A /* RealTimeAPIDemo */,
 				7BE802572D2877D30080E06A /* PredictedOutputsDemo */,
 				7B50DD292C2A9D1D0070A64D /* LocalChatDemo */,
 				7B99C2E52C0718CD00E701B3 /* Files */,
@@ -491,6 +496,15 @@
 			path = PredictedOutputsDemo;
 			sourceTree = "<group>";
 		};
+		7BE802AE2D3CD15B0080E06A /* RealTimeAPIDemo */ = {
+			isa = PBXGroup;
+			children = (
+				7BE802AF2D3CD1A60080E06A /* RealTimeAPIDemoView.swift */,
+				7BE802B12D3CD4600080E06A /* RealTimeAPIViewModel.swift */,
+			);
+			path = RealTimeAPIDemo;
+			sourceTree = "<group>";
+		};
 /* End PBXGroup section */
 
 /* Begin PBXNativeTarget section */
@@ -631,6 +645,7 @@
 			buildActionMask = 2147483647;
 			files = (
 				7BBE7EA92B02E8E50096A693 /* ChatMessageView.swift in Sources */,
+				7BE802B02D3CD1A60080E06A /* RealTimeAPIDemoView.swift in Sources */,
 				7BE802592D2878170080E06A /* ChatPredictedOutputDemoView.swift in Sources */,
 				7B7239AE2AF9FF0000646679 /* ChatFunctionsCallStreamProvider.swift in Sources */,
 				7B436BA12AE25958003CE281 /* ChatProvider.swift in Sources */,
@@ -644,6 +659,7 @@
 				7B7239A22AF6260D00646679 /* ChatDisplayMessage.swift in Sources */,
 				0DF957862BB543F100DD2013 /* AIProxyIntroView.swift in Sources */,
 				7B1268052B08246400400694 /* AssistantConfigurationDemoView.swift in Sources */,
+				7BE802B22D3CD4600080E06A /* RealTimeAPIViewModel.swift in Sources */,
 				7B436BB72AE7A2EA003CE281 /* ImagesProvider.swift in Sources */,
 				7B436B962AE24A04003CE281 /* OptionsListView.swift in Sources */,
 				7BBE7EDE2B03718E0096A693 /* ChatFunctionCallProvider.swift in Sources */,

diff --git a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/OptionsListView.swift b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/OptionsListView.swift
@@ -35,57 +35,59 @@ struct OptionsListView: View {
       case chatStructuredOutput = "Chat Structured Output"
       case chatStructuredOutputTool = "Chat Structured Output Tools"
       case configureAssistant = "Configure Assistant"
+      case realTimeAPI = "Real time API"
 
       var id: String { rawValue }
    }
 
    var body: some View {
       List(options, id: \.self, selection: $selection) { option in
          Text(option.rawValue)
-            .sheet(item: $selection) { selection in
-               VStack {
-                  Text(selection.rawValue)
-                     .font(.largeTitle)
-                     .padding()
-                  switch selection {
-                  case .audio:
-                     AudioDemoView(service: openAIService)
-                  case .chat:
-                     ChatDemoView(service: openAIService)
-                  case .chatPredictedOutput:
-                     ChatPredictedOutputDemoView(service: openAIService)
-                  case .vision:
-                     ChatVisionDemoView(service: openAIService)
-                  case .embeddings:
-                     EmbeddingsDemoView(service: openAIService)
-                  case .fineTuning:
-                     FineTuningJobDemoView(service: openAIService)
-                  case .files:
-                     FilesDemoView(service: openAIService)
-                  case .images:
-                     ImagesDemoView(service: openAIService)
-                  case .localChat:
-                     LocalChatDemoView(service: openAIService)
-                  case .models:
-                     ModelsDemoView(service: openAIService)
-                  case .moderations:
-                     ModerationDemoView(service: openAIService)
-                  case .chatHistoryConversation:
-                     ChatStreamFluidConversationDemoView(service: openAIService)
-                  case .chatFunctionCall:
-                     ChatFunctionCallDemoView(service: openAIService)
-                  case .chatFunctionsCallStream:
-                     ChatFunctionsCalllStreamDemoView(service: openAIService)
-                  case .chatStructuredOutput:
-                     ChatStructuredOutputDemoView(service: openAIService)
-                  case .chatStructuredOutputTool:
-                     ChatStructureOutputToolDemoView(service: openAIService)
-                  case .configureAssistant:
-                     AssistantConfigurationDemoView(service: openAIService)
-                  }
-               }
+      }
+      .sheet(item: $selection) { selection in
+         VStack {
+            Text(selection.rawValue)
+               .font(.largeTitle)
+               .padding()
+            switch selection {
+            case .audio:
+               AudioDemoView(service: openAIService)
+            case .chat:
+               ChatDemoView(service: openAIService)
+            case .chatPredictedOutput:
+               ChatPredictedOutputDemoView(service: openAIService)
+            case .vision:
+               ChatVisionDemoView(service: openAIService)
+            case .embeddings:
+               EmbeddingsDemoView(service: openAIService)
+            case .fineTuning:
+               FineTuningJobDemoView(service: openAIService)
+            case .files:
+               FilesDemoView(service: openAIService)
+            case .images:
+               ImagesDemoView(service: openAIService)
+            case .localChat:
+               LocalChatDemoView(service: openAIService)
+            case .models:
+               ModelsDemoView(service: openAIService)
+            case .moderations:
+               ModerationDemoView(service: openAIService)
+            case .chatHistoryConversation:
+               ChatStreamFluidConversationDemoView(service: openAIService)
+            case .chatFunctionCall:
+               ChatFunctionCallDemoView(service: openAIService)
+            case .chatFunctionsCallStream:
+               ChatFunctionsCalllStreamDemoView(service: openAIService)
+            case .chatStructuredOutput:
+               ChatStructuredOutputDemoView(service: openAIService)
+            case .chatStructuredOutputTool:
+               ChatStructureOutputToolDemoView(service: openAIService)
+            case .configureAssistant:
+               AssistantConfigurationDemoView(service: openAIService)
+            case .realTimeAPI:
+               RealTimeAPIDemoView(service: openAIService)
             }
+         }
       }
    }
 }
-
diff --git a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/RealTimeAPIDemo/RealTimeAPIDemoView.swift b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/RealTimeAPIDemo/RealTimeAPIDemoView.swift
@@ -0,0 +1,104 @@
+//
+//  RealTimeAPIDemoView.swift
+//  SwiftOpenAIExample
+//
+//  Created by James Rochabrun on 1/18/25.
+//
+
+import SwiftUI
+import AVFoundation
+import SwiftOpenAI
+
+struct RealTimeAPIDemoView: View {
+
+   @State private var realTimeAPIViewModel: RealTimeAPIViewModel
+   @State private var microphonePermission: AVAudioSession.RecordPermission
+
+   init(service: OpenAIService) {
+      realTimeAPIViewModel = .init(service: service)
+      // TODO: Update this with latest API.
+      _microphonePermission = State(initialValue: AVAudioSession.sharedInstance().recordPermission)
+   }
+
+   var body: some View {
+      Group {
+         switch microphonePermission {
+         case .undetermined:
+            requestPermissionButton
+         case .denied:
+            deniedPermissionView
+         case .granted:
+            actionButtons
+         default:
+            Text("Unknown permission state")
+         }
+      }
+      .onAppear {
+         updateMicrophonePermission()
+      }
+   }
+
+   private var actionButtons: some View {
+      VStack(spacing: 40) {
+         startSessionButton
+         endSessionButton
+      }
+   }
+
+   private var startSessionButton: some View {
+      Button {
+         Task {
+            await realTimeAPIViewModel.testOpenAIRealtime()
+         }
+      } label: {
+         Label("Start session", systemImage: "microphone")
+      }
+   }
+
+   public var endSessionButton: some View {
+      Button {
+         Task {
+            await realTimeAPIViewModel.disconnect()
+         }
+      } label: {
+         Label("Stop session", systemImage: "stop")
+      }
+   }
+
+   private var requestPermissionButton: some View {
+      Button {
+         requestMicrophonePermission()
+      } label: {
+         Label("Allow microphone access", systemImage: "mic.slash")
+      }
+   }
+
+   private var deniedPermissionView: some View {
+      VStack(spacing: 12) {
+         Image(systemName: "mic.slash.circle")
+            .font(.largeTitle)
+            .foregroundColor(.red)
+
+         Text("Microphone access is required")
+            .font(.headline)
+
+         Button("Open Settings") {
+            if let settingsUrl = URL(string: UIApplication.openSettingsURLString) {
+               UIApplication.shared.open(settingsUrl)
+            }
+         }
+      }
+   }
+
+   private func updateMicrophonePermission() {
+      microphonePermission = AVAudioSession.sharedInstance().recordPermission
+   }
+
+   private func requestMicrophonePermission() {
+      AVAudioSession.sharedInstance().requestRecordPermission { granted in
+         DispatchQueue.main.async {
+            microphonePermission = granted ? .granted : .denied
+         }
+      }
+   }
+}
diff --git a/Examples/SwiftOpenAIExample/SwiftOpenAIExample/RealTimeAPIDemo/RealTimeAPIViewModel.swift b/Examples/SwiftOpenAIExample/SwiftOpenAIExample/RealTimeAPIDemo/RealTimeAPIViewModel.swift
@@ -0,0 +1,101 @@
+//
+//  RealTimeAPIViewModel.swift
+//  SwiftOpenAI
+//
+//  Created by James Rochabrun on 1/18/25.
+//
+
+import AVFoundation
+import Foundation
+import SwiftOpenAI
+
+@Observable
+final class RealTimeAPIViewModel {
+
+   let service: OpenAIService
+
+   init(service: OpenAIService) {
+      self.service = service
+   }
+
+   var kMicrophoneSampleVendor: MicrophonePCMSampleVendor?
+   var kRealtimeSession: OpenAIRealtimeSession?
+
+   @RealtimeActor
+   func disconnect() {
+      kRealtimeSession?.disconnect()
+   }
+
+   @RealtimeActor
+   func testOpenAIRealtime() async {
+      let sessionConfiguration = OpenAIRealtimeSessionUpdate.SessionConfiguration(
+         inputAudioFormat: "pcm16",
+         inputAudioTranscription: .init(model: "whisper-1"),
+         instructions: "You are tour guide for Monument Valley, Utah",
+         maxResponseOutputTokens: .int(4096),
+         modalities: ["audio", "text"],
+         outputAudioFormat: "pcm16",
+         temperature: 0.7,
+         turnDetection: .init(prefixPaddingMs: 200, silenceDurationMs: 500, threshold: 0.5),
+         voice: "shimmer"
+      )
+
+      let microphoneSampleVendor = MicrophonePCMSampleVendor()
+      let audioStream: AsyncStream<AVAudioPCMBuffer>
+      do {
+         audioStream = try microphoneSampleVendor.start(useVoiceProcessing: true)
+      } catch {
+         fatalError("Could not start audio stream: \(error.localizedDescription)")
+      }
+
+      let realtimeSession: OpenAIRealtimeSession
+      do {
+         realtimeSession = try await service.realTimeSession(
+            sessionConfiguration: sessionConfiguration
+         )
+      } catch {
+         fatalError("Could not create an OpenAI realtime session")
+      }
+
+      var isOpenAIReadyForAudio = false
+      Task {
+         for await buffer in audioStream {
+            if isOpenAIReadyForAudio, let base64Audio = AudioUtils.base64EncodeAudioPCMBuffer(from: buffer) {
+               try await realtimeSession.sendMessage(
+                  OpenAIRealtimeInputAudioBufferAppend(audio: base64Audio)
+               )
+            }
+         }
+         print("Done streaming microphone audio")
+      }
+
+      Task {
+         do {
+            print("Sending response create")
+            try await realtimeSession.sendMessage(OpenAIRealtimeResponseCreate())
+         } catch {
+            print("Could not send the session configuration instructions")
+         }
+      }
+
+      Task {
+         for await message in realtimeSession.receiver {
+            switch message {
+            case .sessionUpdated:
+               isOpenAIReadyForAudio = true
+            case .responseAudioDelta(let base64Audio):
+               InternalAudioPlayer.playPCM16Audio(from: base64Audio)
+            default:
+               break
+            }
+         }
+         print("Done listening for messages from OpenAI")
+      }
+
+      // Some time later
+      // microphoneSampleVendor.stop()
+
+      kMicrophoneSampleVendor = microphoneSampleVendor
+      kRealtimeSession = realtimeSession
+   }
+}
diff --git a/Sources/OpenAI/AIProxy/AIProxyService.swift b/Sources/OpenAI/AIProxy/AIProxyService.swift
@@ -823,6 +823,25 @@ struct AIProxyService: OpenAIService {
       let request = try await OpenAIAPI.vectorStoreFileBatch(.list(vectorStoreID: vectorStoreID, batchID: batchID)).request(aiproxyPartialKey: partialKey, clientID: clientID, organizationID: organizationID, openAIEnvironment: openAIEnvironment, method: .get, queryItems: queryItems, betaHeaderField: Self.assistantsBetaV2)
       return try await fetch(debugEnabled: debugEnabled, type: OpenAIResponse<VectorStoreFileObject>.self, with: request)
    }
+
+   func realTimeSession(
+      sessionConfiguration: OpenAIRealtimeSessionUpdate.SessionConfiguration)
+      async throws -> OpenAIRealtimeSession
+   {
+
+      let request = try await OpenAIAPI.realTime(.realtime).request(
+         aiproxyPartialKey: partialKey,
+         clientID: clientID,
+         organizationID: organizationID,
+         openAIEnvironment: openAIEnvironment,
+         method: .get,
+         queryItems: [.init(name: "model", value: "gpt-4o-mini-realtime-preview-2024-12-17")],
+         betaHeaderField: "realtime=v1")
+      return await OpenAIRealtimeSession(
+         webSocketTask: self.session.webSocketTask(with: request),
+         sessionConfiguration: sessionConfiguration
+      )
+   }
 }