diff --git a/.circleci/config.yml b/.circleci/config.yml index 8ecd4bc4..5bb3e2d5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -28,6 +28,14 @@ jobs: brew install swift-format pre-commit run --all if ! git diff --quiet; then echo 'Style checks failed, please install pre-commit and run pre-commit run --all and push the change'; exit 1; fi + - run: + name: Run Tests (Xcode, macOS) + command: | + xcodebuild -version + xcrun --show-sdk-build-version + swift --version + find . -name Package.resolved -exec rm {} \; + xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X' - run: name: Build Examples command: | diff --git a/Applications/VLMEval/ContentView.swift b/Applications/VLMEval/ContentView.swift index 5b9dbfc9..76133dc5 100644 --- a/Applications/VLMEval/ContentView.swift +++ b/Applications/VLMEval/ContentView.swift @@ -412,13 +412,22 @@ class VLMEvaluator { if !images.isEmpty || !videos.isEmpty { [ [ - "role": "user", + "role": "system", "content": [ [ "type": "text", "text": videoURL != nil ? videoSystemPrompt : imageSystemPrompt, ] + ], + ], + [ + "role": "user", + "content": [ + [ + "type": "text", + "text": prompt, + ] ] // Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models. + images.map { _ in @@ -427,7 +436,7 @@ class VLMEvaluator { + videos.map { _ in ["type": "video"] }, - ] + ], ] } else { [ diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift index 4504889b..c6fe7c50 100644 --- a/Libraries/MLXLLM/LLMModelFactory.swift +++ b/Libraries/MLXLLM/LLMModelFactory.swift @@ -199,23 +199,29 @@ private struct LLMUserInputProcessor: UserInputProcessor { let tokenizer: Tokenizer let configuration: ModelConfiguration + let messageGenerator: MessageGenerator - internal init(tokenizer: any Tokenizer, configuration: ModelConfiguration) { + internal init( + tokenizer: any Tokenizer, configuration: ModelConfiguration, + messageGenerator: MessageGenerator + ) { self.tokenizer = tokenizer self.configuration = configuration + self.messageGenerator = messageGenerator } func prepare(input: UserInput) throws -> LMInput { + let messages = messageGenerator.generate(from: input) + do { - let messages = input.prompt.asMessages() let promptTokens = try tokenizer.applyChatTemplate( messages: messages, tools: input.tools, additionalContext: input.additionalContext) return LMInput(tokens: MLXArray(promptTokens)) } catch { // #150 -- it might be a TokenizerError.chatTemplate("No chat template was specified") // but that is not public so just fall back to text - let prompt = input.prompt - .asMessages() + let prompt = + messages .compactMap { $0["content"] as? String } .joined(separator: ". ") let promptTokens = tokenizer.encode(text: prompt) @@ -273,7 +279,9 @@ public class LLMModelFactory: ModelFactory { return .init( configuration: configuration, model: model, - processor: LLMUserInputProcessor(tokenizer: tokenizer, configuration: configuration), + processor: LLMUserInputProcessor( + tokenizer: tokenizer, configuration: configuration, + messageGenerator: DefaultMessageGenerator()), tokenizer: tokenizer) } diff --git a/Libraries/MLXLMCommon/Chat.swift b/Libraries/MLXLMCommon/Chat.swift new file mode 100644 index 00000000..029bf83f --- /dev/null +++ b/Libraries/MLXLMCommon/Chat.swift @@ -0,0 +1,114 @@ +// Copyright © 2025 Apple Inc. + +public enum Chat { + public struct Message { + /// The role of the message sender. + public var role: Role + + /// The content of the message. + public var content: String + + /// Array of image data associated with the message. + public var images: [UserInput.Image] + + /// Array of video data associated with the message. + public var videos: [UserInput.Video] + + public init( + role: Role, content: String, images: [UserInput.Image] = [], + videos: [UserInput.Video] = [] + ) { + self.role = role + self.content = content + self.images = images + self.videos = videos + } + + public static func system( + _ content: String, images: [UserInput.Image] = [], videos: [UserInput.Video] = [] + ) -> Self { + Self(role: .system, content: content, images: images, videos: videos) + } + + public static func assistant( + _ content: String, images: [UserInput.Image] = [], videos: [UserInput.Video] = [] + ) -> Self { + Self(role: .assistant, content: content, images: images, videos: videos) + } + + public static func user( + _ content: String, images: [UserInput.Image] = [], videos: [UserInput.Video] = [] + ) -> Self { + Self(role: .user, content: content, images: images, videos: videos) + } + + public enum Role: String { + case user + case assistant + case system + } + } +} + +/// Protocol for something that can convert structured +/// ``Chat.Message`` into model specific ``Message`` +/// (raw dictionary) format. +/// +/// Typically this is owned and used by a ``UserInputProcessor``: +/// +/// ```swift +/// public func prepare(input: UserInput) async throws -> LMInput { +/// let messages = Qwen2VLMessageGenerator().generate(from: input) +/// ... +/// ``` +public protocol MessageGenerator { + + /// Returns `[String: Any]` aka ``Message``. + func generate(message: Chat.Message) -> Message +} + +extension MessageGenerator { + /// Returns array of `[String: Any]` aka ``Message`` + public func generate(messages: [Chat.Message]) -> [Message] { + var rawMessages: [Message] = [] + + for message in messages { + let raw = generate(message: message) + rawMessages.append(raw) + } + + return rawMessages + } + + /// Generates messages from the input. + public func generate(from input: UserInput) -> [Message] { + switch input.prompt { + case .text(let text): + generate(messages: [.user(text)]) + case .messages(let messages): + messages + case .chat(let messages): + generate(messages: messages) + } + } +} + +/// Default implementation of ``MessageGenerator`` that produces a +/// `role` and `content`. +/// +/// ```swift +/// [ +/// "role": message.role.rawValue, +/// "content": message.content, +/// ] +/// ``` +public struct DefaultMessageGenerator: MessageGenerator { + public init() {} + + public func generate(message: Chat.Message) -> Message { + [ + "role": message.role.rawValue, + "content": message.content, + ] + } +} diff --git a/Libraries/MLXLMCommon/UserInput.swift b/Libraries/MLXLMCommon/UserInput.swift index 6b26bccb..e94600bc 100644 --- a/Libraries/MLXLMCommon/UserInput.swift +++ b/Libraries/MLXLMCommon/UserInput.swift @@ -13,19 +13,20 @@ public typealias Message = [String: Any] /// A ``UserInputProcessor`` can convert this to ``LMInput``. /// See also ``ModelContext``. public struct UserInput: Sendable { + /// Representation of a prompt or series of messages (conversation). + /// + /// This may be a single string with a user prompt or a series of back + /// and forth responses representing a conversation. public enum Prompt: Sendable, CustomStringConvertible { + /// a single string case text(String) + + /// model specific array of dictionaries case messages([Message]) - public func asMessages() -> [Message] { - switch self { - case .text(let text): - return [["role": "user", "content": text]] - case .messages(let messages): - return messages - } - } + /// model agnostic structured chat (series of messages) + case chat([Chat.Message]) public var description: String { switch self { @@ -33,10 +34,13 @@ public struct UserInput: Sendable { return text case .messages(let messages): return messages.map { $0.description }.joined(separator: "\n") + case .chat(let messages): + return messages.map(\.content).joined(separator: "\n") } } } + /// Representation of a video resource. public enum Video: Sendable { case avAsset(AVAsset) case url(URL) @@ -51,7 +55,7 @@ public struct UserInput: Sendable { } } - /// Representation of a single image. + /// Representation of an image resource. public enum Image: Sendable { case ciImage(CIImage) case url(URL) @@ -124,44 +128,178 @@ public struct UserInput: Sendable { } } - public var prompt: Prompt + /// The prompt to evaluate. + public var prompt: Prompt { + didSet { + switch prompt { + case .text, .messages: + // no action + break + case .chat(let messages): + // rebuild images & videos + self.images = messages.reduce(into: []) { result, message in + result.append(contentsOf: message.images) + } + self.videos = messages.reduce(into: []) { result, message in + result.append(contentsOf: message.videos) + } + } + } + } + + /// The images associated with the `UserInput`. + /// + /// If the ``prompt-swift.property`` is a ``Prompt-swift.enum/chat(_:)`` this will + /// collect the images from the chat messages, otherwise these are the stored images with the ``UserInput``. public var images = [Image]() + + /// The images associated with the `UserInput`. + /// + /// If the ``prompt-swift.property`` is a ``Prompt-swift.enum/chat(_:)`` this will + /// collect the videos from the chat messages, otherwise these are the stored videos with the ``UserInput``. public var videos = [Video]() + public var tools: [ToolSpec]? + /// Additional values provided for the chat template rendering context public var additionalContext: [String: Any]? public var processing: Processing = .init() + /// Initialize the `UserInput` with a single text prompt. + /// + /// - Parameters: + /// - prompt: text prompt + /// - images: optional images + /// - videos: optional videos + /// - tools: optional tool specifications + /// - additionalContext: optional context (model specific) + /// ### See Also + /// - ``Prompt-swift.enum/text(_:)`` + /// - ``init(chat:tools:additionalContext:)`` public init( prompt: String, images: [Image] = [Image](), videos: [Video] = [Video](), tools: [ToolSpec]? = nil, additionalContext: [String: Any]? = nil ) { - self.prompt = .text(prompt) - self.images = images - self.videos = videos + self.prompt = .chat([ + .user(prompt, images: images, videos: videos) + ]) self.tools = tools self.additionalContext = additionalContext } + /// Initialize the `UserInput` with model specific mesage structures. + /// + /// For example, the Qwen2VL model wants input in this format: + /// + /// ``` + /// [ + /// [ + /// "role": "user", + /// "content": [ + /// [ + /// "type": "text", + /// "text": "What is this?" + /// ], + /// [ + /// "type": "image", + /// ], + /// ] + /// ] + /// ] + /// ``` + /// + /// Typically the ``init(chat:tools:additionalContext:)`` should be used instead + /// along with a model specific ``MessageGenerator`` (supplied by the ``UserInputProcessor``). + /// + /// - Parameters: + /// - messages: array of dictionaries representing the prompt in a model specific format + /// - images: optional images + /// - videos: optional videos + /// - tools: optional tool specifications + /// - additionalContext: optional context (model specific) + /// ### See Also + /// - ``Prompt-swift.enum/text(_:)`` + /// - ``init(chat:tools:additionalContext:)`` public init( messages: [Message], images: [Image] = [Image](), videos: [Video] = [Video](), tools: [ToolSpec]? = nil, additionalContext: [String: Any]? = nil ) { self.prompt = .messages(messages) - self.images = images - self.videos = videos self.tools = tools self.additionalContext = additionalContext } + /// Initialize the `UserInput` with a model agnostic structured context. + /// + /// For example: + /// + /// ``` + /// let chat: [Chat.Message] = [ + /// .system("You are a helpful photographic assistant."), + /// .user("Please describe the photo.", images: [image1]), + /// ] + /// let userInput = UserInput(chat: chat) + /// ``` + /// + /// A model specific ``MessageGenerator`` (supplied by the ``UserInputProcessor``) + /// is used to convert this into a model specific format. + /// + /// - Parameters: + /// - chat: structured content + /// - tools: optional tool specifications + /// - additionalContext: optional context (model specific) + /// ### See Also + /// - ``Prompt-swift.enum/text(_:)`` + /// - ``init(chat:tools:additionalContext:)`` + public init( + chat: [Chat.Message], + tools: [ToolSpec]? = nil, + additionalContext: [String: Any]? = nil + ) { + self.prompt = .chat(chat) + + // note: prompt.didSet is not triggered in init + self.images = chat.reduce(into: []) { result, message in + result.append(contentsOf: message.images) + } + self.videos = chat.reduce(into: []) { result, message in + result.append(contentsOf: message.videos) + } + self.tools = tools + self.additionalContext = additionalContext + } + + /// Initialize the `UserInput` with a preconfigured ``Prompt-swift.enum``. + /// + /// ``init(chat:tools:additionalContext:)`` is the preferred mechanism. + /// + /// - Parameters: + /// - prompt: the prompt + /// - images: optional images + /// - videos: optional videos + /// - tools: optional tool specifications + /// - processing: optional processing to be applied to media + /// - additionalContext: optional context (model specific) + /// ### See Also + /// - ``Prompt-swift.enum/text(_:)`` + /// - ``init(chat:tools:additionalContext:)`` public init( - prompt: Prompt, images: [Image] = [Image](), processing: Processing = .init(), + prompt: Prompt, + images: [Image] = [Image](), + videos: [Video] = [Video](), + processing: Processing = .init(), tools: [ToolSpec]? = nil, additionalContext: [String: Any]? = nil ) { self.prompt = prompt - self.images = images + switch prompt { + case .text, .messages: + self.images = images + self.videos = videos + case .chat: + break + } self.processing = processing self.tools = tools self.additionalContext = additionalContext diff --git a/Libraries/MLXVLM/Models/Idefics3.swift b/Libraries/MLXVLM/Models/Idefics3.swift index 0a63e87c..ce769b75 100644 --- a/Libraries/MLXVLM/Models/Idefics3.swift +++ b/Libraries/MLXVLM/Models/Idefics3.swift @@ -817,10 +817,19 @@ public class Idefics3Processor: UserInputProcessor { self.tokenizer = tokenizer } - public func prepare(input: UserInput) throws -> LMInput { - - let prompt = input.prompt.asMessages().last?["content"] as? String ?? "" + private func prompt(from userInput: UserInput) -> String { + switch userInput.prompt { + case .text(let text): + text + case .messages(let messages): + messages.last?["content"] as? String ?? "" + case .chat(let messages): + messages.last?.content ?? "" + } + } + public func prepare(input: UserInput) throws -> LMInput { + let prompt = prompt(from: input) if input.images.isEmpty { // No image scenario let tokens = try tokenizer.encode(text: prompt) diff --git a/Libraries/MLXVLM/Models/Paligemma.swift b/Libraries/MLXVLM/Models/Paligemma.swift index c8a46e8f..fce90da4 100644 --- a/Libraries/MLXVLM/Models/Paligemma.swift +++ b/Libraries/MLXVLM/Models/Paligemma.swift @@ -478,7 +478,7 @@ public class PaliGemmaProcessor: UserInputProcessor { } // this doesn't have a chat template so just use the last message. - var prompt = input.prompt.asMessages().last?["content"] as? String ?? "" + var prompt = prompt(from: input) // based on transformers/processing_paligemma let count = input.images.count * config.imageSequenceLength @@ -495,6 +495,17 @@ public class PaliGemmaProcessor: UserInputProcessor { return LMInput(text: .init(tokens: promptArray, mask: mask), image: .init(pixels: pixels)) } + private func prompt(from userInput: UserInput) -> String { + switch userInput.prompt { + case .text(let text): + text + case .messages(let messages): + messages.last?["content"] as? String ?? "" + case .chat(let messages): + messages.last?.content ?? "" + } + } + } // MARK: - Model diff --git a/Libraries/MLXVLM/Models/Qwen25VL.swift b/Libraries/MLXVLM/Models/Qwen25VL.swift index 4ddce506..7db8cc58 100644 --- a/Libraries/MLXVLM/Models/Qwen25VL.swift +++ b/Libraries/MLXVLM/Models/Qwen25VL.swift @@ -718,7 +718,7 @@ public class Qwen25VLProcessor: UserInputProcessor { } public func prepare(input: UserInput) async throws -> LMInput { - let messages = input.prompt.asMessages() + let messages = Qwen2VLMessageGenerator().generate(from: input) var promptTokens = try tokenizer.applyChatTemplate(messages: messages) diff --git a/Libraries/MLXVLM/Models/Qwen2VL.swift b/Libraries/MLXVLM/Models/Qwen2VL.swift index 8b20d576..d9dc9cd8 100644 --- a/Libraries/MLXVLM/Models/Qwen2VL.swift +++ b/Libraries/MLXVLM/Models/Qwen2VL.swift @@ -560,7 +560,8 @@ public class Qwen2VLProcessor: UserInputProcessor { } public func prepare(input: UserInput) async throws -> LMInput { - let messages = input.prompt.asMessages() + let messages = Qwen2VLMessageGenerator().generate(from: input) + var promptTokens = try tokenizer.applyChatTemplate(messages: messages) // Text-only input @@ -898,3 +899,24 @@ public struct Qwen2VLProcessorConfiguration: Codable, Sendable { case _size = "size" } } + +/// Message Generator for Qwen2VL +public struct Qwen2VLMessageGenerator: MessageGenerator { + public init() {} + + public func generate(message: Chat.Message) -> Message { + [ + "role": message.role.rawValue, + "content": [ + ["type": "text", "text": message.content] + ] + // Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models. + + message.images.map { _ in + ["type": "image"] + } + + message.videos.map { _ in + ["type": "video"] + }, + ] + } +} diff --git a/Libraries/MLXVLM/Models/SmolVLM2.swift b/Libraries/MLXVLM/Models/SmolVLM2.swift index 70879ec1..b75a9717 100644 --- a/Libraries/MLXVLM/Models/SmolVLM2.swift +++ b/Libraries/MLXVLM/Models/SmolVLM2.swift @@ -221,7 +221,7 @@ public class SmolVLMProcessor: UserInputProcessor { } public func prepare(input: UserInput) async throws -> LMInput { - let messages = input.prompt.asMessages() + let messages = Qwen2VLMessageGenerator().generate(from: input) // TODO: Create SmolVLM2MessageGenerator if input.images.isEmpty && input.videos.isEmpty { // No image scenario diff --git a/Package.swift b/Package.swift index f0e8bc4a..670b1bd8 100644 --- a/Package.swift +++ b/Package.swift @@ -91,6 +91,26 @@ let package = Package( .enableExperimentalFeature("StrictConcurrency") ] ), + .testTarget( + name: "MLXLMTests", + dependencies: [ + .product(name: "MLX", package: "mlx-swift"), + .product(name: "MLXNN", package: "mlx-swift"), + .product(name: "MLXOptimizers", package: "mlx-swift"), + .product(name: "MLXRandom", package: "mlx-swift"), + .product(name: "Transformers", package: "swift-transformers"), + "MLXLMCommon", + "MLXLLM", + "MLXVLM", + ], + path: "Tests/MLXLMTests", + exclude: [ + "README.md" + ], + swiftSettings: [ + .enableExperimentalFeature("StrictConcurrency") + ] + ), .target( name: "MLXEmbedders", dependencies: [ diff --git a/Tests/MLXLMTests/README.md b/Tests/MLXLMTests/README.md new file mode 100644 index 00000000..e69de29b diff --git a/Tests/MLXLMTests/UserInputTests.swift b/Tests/MLXLMTests/UserInputTests.swift new file mode 100644 index 00000000..f366c6da --- /dev/null +++ b/Tests/MLXLMTests/UserInputTests.swift @@ -0,0 +1,143 @@ +import Foundation +import MLX +import MLXLMCommon +import MLXVLM +import XCTest + +func assertEqual( + _ v1: Any, _ v2: Any, path: [String] = [], file: StaticString = #filePath, line: UInt = #line +) { + switch (v1, v2) { + case let (v1, v2) as (String, String): + XCTAssertEqual(v1, v2, file: file, line: line) + + case let (v1, v2) as ([Any], [Any]): + XCTAssertEqual( + v1.count, v2.count, "Arrays not equal size at \(path)", file: file, line: line) + + for (index, (v1v, v2v)) in zip(v1, v2).enumerated() { + assertEqual(v1v, v2v, path: path + [index.description], file: file, line: line) + } + + case let (v1, v2) as ([String: Any], [String: Any]): + XCTAssertEqual( + v1.keys.sorted(), v2.keys.sorted(), + "\(String(describing: v1.keys.sorted())) and \(String(describing: v2.keys.sorted())) not equal at \(path)", + file: file, line: line) + + for (k, v1v) in v1 { + if let v2v = v2[k] { + assertEqual(v1v, v2v, path: path + [k], file: file, line: line) + } else { + XCTFail("Missing value for \(k) at \(path)", file: file, line: line) + } + } + default: + XCTFail( + "Unable to compare \(String(describing: v1)) and \(String(describing: v2)) at \(path)", + file: file, line: line) + } +} + +public class UserInputTests: XCTestCase { + + public func testStandardConversion() { + let chat: [Chat.Message] = [ + .system("You are a useful agent."), + .user("Tell me a story."), + ] + + let messages = DefaultMessageGenerator().generate(messages: chat) + + let expected = [ + [ + "role": "system", + "content": "You are a useful agent.", + ], + [ + "role": "user", + "content": "Tell me a story.", + ], + ] + + XCTAssertEqual(expected, messages as? [[String: String]]) + } + + public func testQwen2ConversionText() { + let chat: [Chat.Message] = [ + .system("You are a useful agent."), + .user("Tell me a story."), + ] + + let messages = Qwen2VLMessageGenerator().generate(messages: chat) + + let expected = [ + [ + "role": "system", + "content": [ + [ + "type": "text", + "text": "You are a useful agent.", + ] + ], + ], + [ + "role": "user", + "content": [ + [ + "type": "text", + "text": "Tell me a story.", + ] + ], + ], + ] + + assertEqual(expected, messages) + } + + public func testQwen2ConversionImage() { + let chat: [Chat.Message] = [ + .system("You are a useful agent."), + .user( + "What is this?", + images: [ + .url( + URL( + string: "https://opensource.apple.com/images/projects/mlx.f5c59d8b.png")! + ) + ]), + ] + + let messages = Qwen2VLMessageGenerator().generate(messages: chat) + + let expected = [ + [ + "role": "system", + "content": [ + [ + "type": "text", + "text": "You are a useful agent.", + ] + ], + ], + [ + "role": "user", + "content": [ + [ + "type": "text", + "text": "What is this?", + ], + [ + "type": "image" + ], + ], + ], + ] + + assertEqual(expected, messages) + + let userInput = UserInput(chat: chat) + XCTAssertEqual(userInput.images.count, 1) + } + +} diff --git a/Tests/mlx-libraries-Package.xctestplan b/Tests/mlx-libraries-Package.xctestplan new file mode 100644 index 00000000..c9d73ef2 --- /dev/null +++ b/Tests/mlx-libraries-Package.xctestplan @@ -0,0 +1,24 @@ +{ + "configurations" : [ + { + "id" : "CCE21325-5E68-419C-8EB1-B868EF7688F7", + "name" : "Test Scheme Action", + "options" : { + + } + } + ], + "defaultOptions" : { + + }, + "testTargets" : [ + { + "target" : { + "containerPath" : "container:mlx-swift-examples.xcodeproj", + "identifier" : "C3208E6D2DB19451006AE6CA", + "name" : "MLXLMTests" + } + } + ], + "version" : 1 +} diff --git a/Tools/llm-tool/LLMTool.swift b/Tools/llm-tool/LLMTool.swift index 528460fc..2314fe07 100644 --- a/Tools/llm-tool/LLMTool.swift +++ b/Tools/llm-tool/LLMTool.swift @@ -227,31 +227,12 @@ struct EvaluateCommand: AsyncParsableCommand { ?? modelConfiguration.defaultPrompt let images = image.map { UserInput.Image.url($0) } let videos = video.map { UserInput.Video.url($0) } - let messages: [[String: Any]] = - if !images.isEmpty || !videos.isEmpty { - [ - [ - "role": "user", - "content": [ - [ - "type": "text", - "text": generate.system, - ] - ] - // Messages format for Qwen 2 VL, Qwen 2.5 VL. May need to be adapted for other models. - + images.map { _ in ["type": "image"] } - + videos.map { _ in ["type": "video"] }, - ] - ] - } else { - [ - [ - "role": "user", - "content": prompt, - ] - ] - } - var userInput = UserInput(messages: messages, images: images, videos: videos) + var userInput = UserInput( + chat: [ + .system(generate.system), + .user(prompt, images: images, videos: videos), + ] + ) if !resize.isEmpty { let size: CGSize if resize.count == 1 { diff --git a/mlx-swift-examples.xcodeproj/project.pbxproj b/mlx-swift-examples.xcodeproj/project.pbxproj index daec81d7..f2018c02 100644 --- a/mlx-swift-examples.xcodeproj/project.pbxproj +++ b/mlx-swift-examples.xcodeproj/project.pbxproj @@ -23,6 +23,10 @@ C3056BBA2BCD981900A31D04 /* train.jsonl in Resources */ = {isa = PBXBuildFile; fileRef = C3056BA22BCD973400A31D04 /* train.jsonl */; }; C3056BBB2BCD981900A31D04 /* test.jsonl in Resources */ = {isa = PBXBuildFile; fileRef = C3056BA12BCD973400A31D04 /* test.jsonl */; }; C3056BBC2BCD981900A31D04 /* valid.jsonl in Resources */ = {isa = PBXBuildFile; fileRef = C3056BA32BCD973400A31D04 /* valid.jsonl */; }; + C3208E762DB1945D006AE6CA /* MLX in Frameworks */ = {isa = PBXBuildFile; productRef = C3208E752DB1945D006AE6CA /* MLX */; }; + C3208E7A2DB1945D006AE6CA /* MLXNN in Frameworks */ = {isa = PBXBuildFile; productRef = C3208E792DB1945D006AE6CA /* MLXNN */; }; + C3208E7B2DB194ED006AE6CA /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = C32A17FC2CFFB98A0092A5B6 /* MLXLLM */; }; + C3208ED02DB195A7006AE6CA /* MLXVLM in Frameworks */ = {isa = PBXBuildFile; productRef = C32A17FE2CFFB98A0092A5B6 /* MLXVLM */; }; C3288D762B6D9313009FF608 /* LinearModelTraining.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3288D752B6D9313009FF608 /* LinearModelTraining.swift */; }; C3288D7B2B6D9339009FF608 /* ArgumentParser in Frameworks */ = {isa = PBXBuildFile; productRef = C3288D7A2B6D9339009FF608 /* ArgumentParser */; }; C32A17FD2CFFB98A0092A5B6 /* MLXLLM in Frameworks */ = {isa = PBXBuildFile; productRef = C32A17FC2CFFB98A0092A5B6 /* MLXLLM */; }; @@ -61,6 +65,7 @@ C3A8B3F42B92A2A90002EFB8 /* LLMEvalApp.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3A8B3ED2B92A2A90002EFB8 /* LLMEvalApp.swift */; }; C3A8B3F52B92A2A90002EFB8 /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C3A8B3EF2B92A2A90002EFB8 /* Preview Assets.xcassets */; }; C3A8B3F72B92A2A90002EFB8 /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = C3A8B3F22B92A2A90002EFB8 /* ContentView.swift */; }; + C3C7C4FB2DB19026000373CF /* AVKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C3C7C4FA2DB19026000373CF /* AVKit.framework */; }; C3E7D94D2CF6C9B20056C095 /* StableDiffusion in Frameworks */ = {isa = PBXBuildFile; productRef = C3E7D94C2CF6C9B20056C095 /* StableDiffusion */; }; /* End PBXBuildFile section */ @@ -203,6 +208,7 @@ C3056BB32BCD97B800A31D04 /* LoRATrainingExample.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = LoRATrainingExample.entitlements; sourceTree = ""; }; C3056BB52BCD97B800A31D04 /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; C3056BC42BCDAB8600A31D04 /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + C3208E6E2DB19451006AE6CA /* MLXLMTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = MLXLMTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; C325DE3F2B648CDB00628871 /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; C3288D732B6D9313009FF608 /* LinearModelTraining */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = LinearModelTraining; sourceTree = BUILT_PRODUCTS_DIR; }; C3288D752B6D9313009FF608 /* LinearModelTraining.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = LinearModelTraining.swift; sourceTree = ""; }; @@ -242,12 +248,27 @@ C3A8B3F22B92A2A90002EFB8 /* ContentView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; C3C3240B2B6CA689007D2D9A /* README.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; C3C36A6B2CA714600099FFA4 /* Build.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Build.xcconfig; sourceTree = ""; }; + C3C7C4D92DB16C83000373CF /* AVFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVFoundation.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk/System/Library/Frameworks/AVFoundation.framework; sourceTree = DEVELOPER_DIR; }; + C3C7C4DB2DB16C89000373CF /* CoreImage.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreImage.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk/System/Library/Frameworks/CoreImage.framework; sourceTree = DEVELOPER_DIR; }; + C3C7C4DD2DB16CA2000373CF /* CoreAudioTypes.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreAudioTypes.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk/System/Library/Frameworks/CoreAudioTypes.framework; sourceTree = DEVELOPER_DIR; }; + C3C7C4FA2DB19026000373CF /* AVKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = AVKit.framework; path = Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk/System/Library/Frameworks/AVKit.framework; sourceTree = DEVELOPER_DIR; }; C3D573052C40701E00857A35 /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; F8D7023A2BB4E223003D7CF5 /* Package.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Package.swift; sourceTree = ""; }; /* End PBXFileReference section */ +/* Begin PBXFileSystemSynchronizedBuildFileExceptionSet section */ + C3208F272DB19614006AE6CA /* PBXFileSystemSynchronizedBuildFileExceptionSet */ = { + isa = PBXFileSystemSynchronizedBuildFileExceptionSet; + membershipExceptions = ( + MLXLMTests/UserInputTests.swift, + ); + target = C3208E6D2DB19451006AE6CA /* MLXLMTests */; + }; +/* End PBXFileSystemSynchronizedBuildFileExceptionSet section */ + /* Begin PBXFileSystemSynchronizedRootGroup section */ C397D92E2CD440EF00B87EE2 /* Libraries */ = {isa = PBXFileSystemSynchronizedRootGroup; explicitFileTypes = {}; explicitFolders = (); path = Libraries; sourceTree = ""; }; + C3C7C4222DB16A02000373CF /* Tests */ = {isa = PBXFileSystemSynchronizedRootGroup; exceptions = (C3208F272DB19614006AE6CA /* PBXFileSystemSynchronizedBuildFileExceptionSet */, ); explicitFileTypes = {}; explicitFolders = (); path = Tests; sourceTree = ""; }; /* End PBXFileSystemSynchronizedRootGroup section */ /* Begin PBXFrameworksBuildPhase section */ @@ -255,6 +276,7 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( + C3C7C4FB2DB19026000373CF /* AVKit.framework in Frameworks */, 0A8284222D13863900BEF338 /* MLXVLM in Frameworks */, C32B4C6D2DA7136000EF663D /* AsyncAlgorithms in Frameworks */, ); @@ -268,6 +290,17 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C3208E6B2DB19451006AE6CA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + C3208E7B2DB194ED006AE6CA /* MLXLLM in Frameworks */, + C3208ED02DB195A7006AE6CA /* MLXVLM in Frameworks */, + C3208E7A2DB1945D006AE6CA /* MLXNN in Frameworks */, + C3208E762DB1945D006AE6CA /* MLX in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; C3288D702B6D9313009FF608 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -481,6 +514,7 @@ C3C36A6C2CA714600099FFA4 /* Configuration */, C3056BA62BCD973400A31D04 /* Data */, C397D92E2CD440EF00B87EE2 /* Libraries */, + C3C7C4222DB16A02000373CF /* Tests */, C3A8B3AD2B9294E30002EFB8 /* Applications */, C39273812B606A7400368D5D /* Tools */, C39273752B606A0A00368D5D /* Products */, @@ -501,6 +535,7 @@ C36BEFE02BC32988002D4AFE /* image-tool */, C36BF0012BC5CE55002D4AFE /* StableDiffusionExample.app */, 0AC74EBB2D136221003C90A7 /* VLMEval.app */, + C3208E6E2DB19451006AE6CA /* MLXLMTests.xctest */, ); name = Products; sourceTree = ""; @@ -516,6 +551,10 @@ C392737E2B606A2C00368D5D /* Frameworks */ = { isa = PBXGroup; children = ( + C3C7C4FA2DB19026000373CF /* AVKit.framework */, + C3C7C4DD2DB16CA2000373CF /* CoreAudioTypes.framework */, + C3C7C4DB2DB16C89000373CF /* CoreImage.framework */, + C3C7C4D92DB16C83000373CF /* AVFoundation.framework */, ); name = Frameworks; sourceTree = ""; @@ -642,6 +681,27 @@ productReference = C3056BAB2BCD97B700A31D04 /* LoRATrainingExample.app */; productType = "com.apple.product-type.application"; }; + C3208E6D2DB19451006AE6CA /* MLXLMTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = C3208E722DB19451006AE6CA /* Build configuration list for PBXNativeTarget "MLXLMTests" */; + buildPhases = ( + C3208E6A2DB19451006AE6CA /* Sources */, + C3208E6B2DB19451006AE6CA /* Frameworks */, + C3208E6C2DB19451006AE6CA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = MLXLMTests; + packageProductDependencies = ( + C3208E752DB1945D006AE6CA /* MLX */, + C3208E792DB1945D006AE6CA /* MLXNN */, + ); + productName = MLXLMTests; + productReference = C3208E6E2DB19451006AE6CA /* MLXLMTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; C3288D722B6D9313009FF608 /* LinearModelTraining */ = { isa = PBXNativeTarget; buildConfigurationList = C3288D792B6D9313009FF608 /* Build configuration list for PBXNativeTarget "LinearModelTraining" */; @@ -886,6 +946,7 @@ C36BEFDF2BC32988002D4AFE /* image-tool */, C36BF0002BC5CE55002D4AFE /* StableDiffusionExample */, 0AC74EBA2D136221003C90A7 /* VLMEval */, + C3208E6D2DB19451006AE6CA /* MLXLMTests */, ); }; /* End PBXProject section */ @@ -912,6 +973,13 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C3208E6C2DB19451006AE6CA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; C36BEFFF2BC5CE55002D4AFE /* Resources */ = { isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; @@ -961,6 +1029,13 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + C3208E6A2DB19451006AE6CA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; C3288D6F2B6D9313009FF608 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -1344,6 +1419,96 @@ }; name = Release; }; + C3208E732DB19451006AE6CA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CODE_SIGN_STYLE = Automatic; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_DYNAMIC_NO_PIC = NO; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GENERATE_INFOPLIST_FILE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.6; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 14.6; + MARKETING_VERSION = 1.0; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + PRODUCT_BUNDLE_IDENTIFIER = mlx.MLXLMTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = auto; + SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG $(inherited)"; + SWIFT_EMIT_LOC_STRINGS = NO; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2,7"; + XROS_DEPLOYMENT_TARGET = 2.0; + }; + name = Debug; + }; + C3208E742DB19451006AE6CA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++20"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CODE_SIGN_STYLE = Automatic; + COPY_PHASE_STRIP = NO; + CURRENT_PROJECT_VERSION = 1; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_USER_SCRIPT_SANDBOXING = YES; + GCC_C_LANGUAGE_STANDARD = gnu17; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GENERATE_INFOPLIST_FILE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 17.6; + LOCALIZATION_PREFERS_STRING_CATALOGS = YES; + MACOSX_DEPLOYMENT_TARGET = 14.6; + MARKETING_VERSION = 1.0; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + PRODUCT_BUNDLE_IDENTIFIER = mlx.MLXLMTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = auto; + SUPPORTED_PLATFORMS = "iphoneos iphonesimulator macosx xros xrsimulator"; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_EMIT_LOC_STRINGS = NO; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2,7"; + XROS_DEPLOYMENT_TARGET = 2.0; + }; + name = Release; + }; C3288D772B6D9313009FF608 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -2614,6 +2779,15 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + C3208E722DB19451006AE6CA /* Build configuration list for PBXNativeTarget "MLXLMTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + C3208E732DB19451006AE6CA /* Debug */, + C3208E742DB19451006AE6CA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; C3288D792B6D9313009FF608 /* Build configuration list for PBXNativeTarget "LinearModelTraining" */ = { isa = XCConfigurationList; buildConfigurations = ( @@ -2766,6 +2940,16 @@ package = 81695B3F2BA373D300F260D8 /* XCRemoteSwiftPackageReference "swift-markdown-ui" */; productName = MarkdownUI; }; + C3208E752DB1945D006AE6CA /* MLX */ = { + isa = XCSwiftPackageProductDependency; + package = C32A18442D00E13E0092A5B6 /* XCRemoteSwiftPackageReference "mlx-swift" */; + productName = MLX; + }; + C3208E792DB1945D006AE6CA /* MLXNN */ = { + isa = XCSwiftPackageProductDependency; + package = C32A18442D00E13E0092A5B6 /* XCRemoteSwiftPackageReference "mlx-swift" */; + productName = MLXNN; + }; C3288D7A2B6D9339009FF608 /* ArgumentParser */ = { isa = XCSwiftPackageProductDependency; package = C392736E2B60699100368D5D /* XCRemoteSwiftPackageReference "swift-argument-parser" */; diff --git a/mlx-swift-examples.xcodeproj/xcshareddata/xcschemes/mlx-libraries-Package.xcscheme b/mlx-swift-examples.xcodeproj/xcshareddata/xcschemes/mlx-libraries-Package.xcscheme new file mode 100644 index 00000000..8e69394e --- /dev/null +++ b/mlx-swift-examples.xcodeproj/xcshareddata/xcschemes/mlx-libraries-Package.xcscheme @@ -0,0 +1,154 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +