Skip to content

Commit b23bbf0

Browse files
Fix messages format for text-only chats (#207)
1 parent 20701c0 commit b23bbf0

File tree

3 files changed

+59
-41
lines changed

3 files changed

+59
-41
lines changed

Applications/VLMEval/ContentView.swift

+23-14
Original file line numberDiff line numberDiff line change
@@ -395,25 +395,34 @@ class VLMEvaluator {
395395
} else {
396396
[]
397397
}
398-
var userInput = UserInput(
399-
messages: [
398+
let messages: [[String: Any]] =
399+
if !images.isEmpty || !videos.isEmpty {
400400
[
401-
"role": "user",
402-
"content": [
403-
["type": "text", "text": prompt]
401+
[
402+
"role": "user",
403+
"content": [
404+
["type": "text", "text": prompt]
405+
]
406+
// Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models.
407+
+ images.map { _ in
408+
["type": "image"]
409+
}
410+
+ videos.map { _ in
411+
["type": "video"]
412+
},
404413
]
405-
+ images.map { _ in
406-
["type": "image"]
407-
}
408-
+ videos.map { _ in
409-
["type": "video"]
410-
},
411414
]
412-
], images: images, videos: videos)
415+
} else {
416+
[
417+
[
418+
"role": "user",
419+
"content": prompt,
420+
]
421+
]
422+
}
423+
var userInput = UserInput(messages: messages, images: images, videos: videos)
413424
userInput.processing.resize = .init(width: 448, height: 448)
414-
415425
let input = try await context.processor.prepare(input: userInput)
416-
417426
return try MLXLMCommon.generate(
418427
input: input,
419428
parameters: generateParameters,

Tools/llm-tool/LLMTool.swift

+23-22
Original file line numberDiff line numberDiff line change
@@ -216,28 +216,30 @@ struct EvaluateCommand: AsyncParsableCommand {
216216
let prompt =
217217
(try? generate.resolvePrompt(configuration: modelConfiguration))
218218
?? modelConfiguration.defaultPrompt
219-
220219
let images = image.map { UserInput.Image.url($0) }
221220
let videos = video.map { UserInput.Video.url($0) }
222-
223-
let messages: [[String: Any]] = [
224-
[
225-
"role": "user",
226-
"content": [
227-
["type": "text", "text": prompt]
221+
let messages: [[String: Any]] =
222+
if !images.isEmpty || !videos.isEmpty {
223+
[
224+
[
225+
"role": "user",
226+
"content": [
227+
["type": "text", "text": prompt]
228+
]
229+
// Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models.
230+
+ images.map { _ in ["type": "image"] }
231+
+ videos.map { _ in ["type": "video"] },
232+
]
228233
]
229-
// Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models.
230-
+ images.map { _ in ["type": "image"] }
231-
+ videos.map { _ in ["type": "video"] },
232-
]
233-
]
234-
235-
var input = UserInput(
236-
messages: messages,
237-
images: images,
238-
videos: videos
239-
)
240-
234+
} else {
235+
[
236+
[
237+
"role": "user",
238+
"content": prompt,
239+
]
240+
]
241+
}
242+
var userInput = UserInput(messages: messages, images: images, videos: videos)
241243
if !resize.isEmpty {
242244
let size: CGSize
243245
if resize.count == 1 {
@@ -249,10 +251,9 @@ struct EvaluateCommand: AsyncParsableCommand {
249251
let v1 = resize[1]
250252
size = CGSize(width: v0, height: v1)
251253
}
252-
input.processing.resize = size
254+
userInput.processing.resize = size
253255
}
254-
255-
return input
256+
return userInput
256257
}
257258

258259
@MainActor

mlx-swift-examples.xcodeproj/xcshareddata/xcschemes/llm-tool.xcscheme

+13-5
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@
5555
argument = "--model mlx-community/CodeLlama-13b-Instruct-hf-4bit-MLX"
5656
isEnabled = "NO">
5757
</CommandLineArgument>
58+
<CommandLineArgument
59+
argument = "--prompt &apos;Describe the image in English.&apos; --image https://www.gstatic.com/webp/gallery/1.webp"
60+
isEnabled = "NO">
61+
</CommandLineArgument>
62+
<CommandLineArgument
63+
argument = "--model mlx-community/Qwen2-VL-2B-Instruct-4bit"
64+
isEnabled = "NO">
65+
</CommandLineArgument>
5866
<CommandLineArgument
5967
argument = "--repetition-penalty 1.2"
6068
isEnabled = "NO">
@@ -79,18 +87,18 @@
7987
argument = "--prompt &apos;def quick_sort(arr, left=None, right=None):&apos;"
8088
isEnabled = "NO">
8189
</CommandLineArgument>
90+
<CommandLineArgument
91+
argument = "--prompt &apos;Why is the sky blue?&apos;"
92+
isEnabled = "YES">
93+
</CommandLineArgument>
8294
<CommandLineArgument
8395
argument = "--model mlx-community/Mistral-7B-v0.1-hf-4bit-mlx"
8496
isEnabled = "NO">
8597
</CommandLineArgument>
8698
<CommandLineArgument
87-
argument = "--prompt &apos;Describe the image in English.&apos; --image https://www.gstatic.com/webp/gallery/1.webp"
99+
argument = "--model mlx-community/Llama-3.2-1B-Instruct-4bit"
88100
isEnabled = "YES">
89101
</CommandLineArgument>
90-
<CommandLineArgument
91-
argument = "--model mlx-community/quantized-gemma-2b-it"
92-
isEnabled = "NO">
93-
</CommandLineArgument>
94102
<CommandLineArgument
95103
argument = "--model mlx-community/phi-2-hf-4bit-mlx"
96104
isEnabled = "NO">

0 commit comments

Comments
 (0)