@@ -13,10 +13,19 @@ public typealias Message = [String: Any]
13
13
/// A ``UserInputProcessor`` can convert this to ``LMInput``.
14
14
/// See also ``ModelContext``.
15
15
public struct UserInput : Sendable {
16
+
16
17
/// Representation of a prompt or series of messages (conversation).
18
+ ///
19
+ /// This may be a single string with a user prompt or a series of back
20
+ /// and forth responses representing a conversation.
17
21
public enum Prompt : Sendable , CustomStringConvertible {
22
+ /// a single string
18
23
case text( String )
24
+
25
+ /// model specific array of dictionaries
19
26
case messages( [ Message ] )
27
+
28
+ /// model agnostic structured chat (series of messages)
20
29
case chat( [ Chat . Message ] )
21
30
22
31
public var description : String {
@@ -31,6 +40,7 @@ public struct UserInput: Sendable {
31
40
}
32
41
}
33
42
43
+ /// Representation of a video resource.
34
44
public enum Video : Sendable {
35
45
case avAsset( AVAsset )
36
46
case url( URL )
@@ -45,7 +55,7 @@ public struct UserInput: Sendable {
45
55
}
46
56
}
47
57
48
- /// Representation of a single image.
58
+ /// Representation of an image resource .
49
59
public enum Image : Sendable {
50
60
case ciImage( CIImage )
51
61
case url( URL )
@@ -118,60 +128,196 @@ public struct UserInput: Sendable {
118
128
}
119
129
}
120
130
131
+ /// The prompt to evaluate.
121
132
public var prompt : Prompt
122
- public var images = [ Image] ( )
123
- public var videos = [ Video] ( )
133
+
134
+ /// The images associated with the `UserInput`.
135
+ ///
136
+ /// If the ``prompt-swift.property`` is a ``Prompt-swift.enum/chat(_:)`` this will
137
+ /// collect the images from the chat messages, otherwise these are the stored images with the ``UserInput``.
138
+ public var images : [ Image ] {
139
+ get {
140
+ switch prompt {
141
+ case . text: _images
142
+ case . messages: _images
143
+ case . chat( let messages) :
144
+ messages. reduce ( into: [ ] ) { result, message in
145
+ result. append ( contentsOf: message. images)
146
+ }
147
+ }
148
+ }
149
+ set {
150
+ switch prompt {
151
+ case . text, . messages:
152
+ _images = newValue
153
+ case . chat:
154
+ break
155
+ }
156
+ }
157
+ }
158
+
159
+ private var _images = [ Image] ( )
160
+
161
+ /// The images associated with the `UserInput`.
162
+ ///
163
+ /// If the ``prompt-swift.property`` is a ``Prompt-swift.enum/chat(_:)`` this will
164
+ /// collect the videos from the chat messages, otherwise these are the stored videos with the ``UserInput``.
165
+ public var videos : [ Video ] {
166
+ get {
167
+ switch prompt {
168
+ case . text: _videos
169
+ case . messages: _videos
170
+ case . chat( let messages) :
171
+ messages. reduce ( into: [ ] ) { result, message in
172
+ result. append ( contentsOf: message. videos)
173
+ }
174
+ }
175
+ }
176
+ set {
177
+ switch prompt {
178
+ case . text, . messages:
179
+ _videos = newValue
180
+ case . chat:
181
+ break
182
+ }
183
+ }
184
+ }
185
+
186
+ private var _videos = [ Video] ( )
187
+
124
188
public var tools : [ ToolSpec ] ?
189
+
125
190
/// Additional values provided for the chat template rendering context
126
191
public var additionalContext : [ String : Any ] ?
127
192
public var processing : Processing = . init( )
128
193
194
+ /// Initialize the `UserInput` with a single text prompt.
195
+ ///
196
+ /// - Parameters:
197
+ /// - prompt: text prompt
198
+ /// - images: optional images
199
+ /// - videos: optional videos
200
+ /// - tools: optional tool specifications
201
+ /// - additionalContext: optional context (model specific)
202
+ /// ### See Also
203
+ /// - ``Prompt-swift.enum/text(_:)``
204
+ /// - ``init(chat:tools:additionalContext:)``
129
205
public init (
130
206
prompt: String , images: [ Image ] = [ Image] ( ) , videos: [ Video ] = [ Video] ( ) ,
131
207
tools: [ ToolSpec ] ? = nil ,
132
208
additionalContext: [ String : Any ] ? = nil
133
209
) {
134
- self . prompt = . text ( prompt )
135
- self . images = images
136
- self . videos = videos
210
+ self . prompt = . chat ( [
211
+ . user ( prompt , images: images, videos : videos )
212
+ ] )
137
213
self . tools = tools
138
214
self . additionalContext = additionalContext
139
215
}
140
216
217
+ /// Initialize the `UserInput` with model specific mesage structures.
218
+ ///
219
+ /// For example, the Qwen2VL model wants input in this format:
220
+ ///
221
+ /// ```
222
+ /// [
223
+ /// [
224
+ /// "role": "user",
225
+ /// "content": [
226
+ /// [
227
+ /// "type": "text",
228
+ /// "text": "What is this?"
229
+ /// ],
230
+ /// [
231
+ /// "type": "image",
232
+ /// ],
233
+ /// ]
234
+ /// ]
235
+ /// ]
236
+ /// ```
237
+ ///
238
+ /// Typically the ``init(chat:tools:additionalContext:)`` should be used instead
239
+ /// along with a model specific ``MessageGenerator`` (supplied by the ``UserInputProcessor``).
240
+ ///
241
+ /// - Parameters:
242
+ /// - messages: array of dictionaries representing the prompt in a model specific format
243
+ /// - images: optional images
244
+ /// - videos: optional videos
245
+ /// - tools: optional tool specifications
246
+ /// - additionalContext: optional context (model specific)
247
+ /// ### See Also
248
+ /// - ``Prompt-swift.enum/text(_:)``
249
+ /// - ``init(chat:tools:additionalContext:)``
141
250
public init (
142
251
messages: [ Message ] , images: [ Image ] = [ Image] ( ) , videos: [ Video ] = [ Video] ( ) ,
143
252
tools: [ ToolSpec ] ? = nil ,
144
253
additionalContext: [ String : Any ] ? = nil
145
254
) {
146
255
self . prompt = . messages( messages)
147
- self . images = images
148
- self . videos = videos
149
256
self . tools = tools
150
257
self . additionalContext = additionalContext
151
258
}
152
259
260
+ /// Initialize the `UserInput` with a model agnostic structured context.
261
+ ///
262
+ /// For example:
263
+ ///
264
+ /// ```
265
+ /// let chat: [Chat.Message] = [
266
+ /// .system("You are a helpful photographic assistant."),
267
+ /// .user("Please describe the photo.", images: [image1]),
268
+ /// ]
269
+ /// let userInput = UserInput(chat: chat)
270
+ /// ```
271
+ ///
272
+ /// A model specific ``MessageGenerator`` (supplied by the ``UserInputProcessor``)
273
+ /// is used to convert this into a model specific format.
274
+ ///
275
+ /// - Parameters:
276
+ /// - chat: structured content
277
+ /// - tools: optional tool specifications
278
+ /// - additionalContext: optional context (model specific)
279
+ /// ### See Also
280
+ /// - ``Prompt-swift.enum/text(_:)``
281
+ /// - ``init(chat:tools:additionalContext:)``
153
282
public init (
154
- messages : [ Chat . Message ] ,
283
+ chat : [ Chat . Message ] ,
155
284
tools: [ ToolSpec ] ? = nil ,
156
285
additionalContext: [ String : Any ] ? = nil
157
286
) {
158
- self . prompt = . chat( messages)
159
- self . images = messages. reduce ( into: [ ] ) { result, message in
160
- result. append ( contentsOf: message. images)
161
- }
162
- self . videos = messages. reduce ( into: [ ] ) { result, message in
163
- result. append ( contentsOf: message. videos)
164
- }
287
+ self . prompt = . chat( chat)
165
288
self . tools = tools
166
289
self . additionalContext = additionalContext
167
290
}
168
291
292
+ /// Initialize the `UserInput` with a preconfigured ``Prompt-swift.enum``.
293
+ ///
294
+ /// ``init(chat:tools:additionalContext:)`` is the preferred mechanism.
295
+ ///
296
+ /// - Parameters:
297
+ /// - prompt: the prompt
298
+ /// - images: optional images
299
+ /// - videos: optional videos
300
+ /// - tools: optional tool specifications
301
+ /// - processing: optional processing to be applied to media
302
+ /// - additionalContext: optional context (model specific)
303
+ /// ### See Also
304
+ /// - ``Prompt-swift.enum/text(_:)``
305
+ /// - ``init(chat:tools:additionalContext:)``
169
306
public init (
170
- prompt: Prompt , images: [ Image ] = [ Image] ( ) , processing: Processing = . init( ) ,
307
+ prompt: Prompt ,
308
+ images: [ Image ] = [ Image] ( ) ,
309
+ videos: [ Video ] = [ Video] ( ) ,
310
+ processing: Processing = . init( ) ,
171
311
tools: [ ToolSpec ] ? = nil , additionalContext: [ String : Any ] ? = nil
172
312
) {
173
313
self . prompt = prompt
174
- self . images = images
314
+ switch prompt {
315
+ case . text, . messages:
316
+ _images = images
317
+ _videos = videos
318
+ case . chat:
319
+ break
320
+ }
175
321
self . processing = processing
176
322
self . tools = tools
177
323
self . additionalContext = additionalContext
0 commit comments