ml-explore
diff --git a/‎Libraries/MLXLLM/LLMModelFactory.swift
Lines changed: 11 additions & 79 deletions b/‎Libraries/MLXLLM/LLMModelFactory.swift
Lines changed: 11 additions & 79 deletions
diff --git a/‎Libraries/MLXLLM/Models/Cohere.swift
Lines changed: 1 addition & 1 deletion b/‎Libraries/MLXLLM/Models/Cohere.swift
Lines changed: 1 addition & 1 deletion
diff --git a/‎Libraries/MLXLLM/Models/OpenELM.swift
Lines changed: 13 additions & 15 deletions b/‎Libraries/MLXLLM/Models/OpenELM.swift
Lines changed: 13 additions & 15 deletions
diff --git a/‎Libraries/MLXLLM/Models/Starcoder2.swift
Lines changed: 1 addition & 1 deletion b/‎Libraries/MLXLLM/Models/Starcoder2.swift
Lines changed: 1 addition & 1 deletion
diff --git a/‎Libraries/MLXLMCommon/ModelConfiguration.swift
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLMCommon/ModelConfiguration.swift
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLMCommon/ModelContainer.swift
Lines changed: 7 additions & 3 deletions b/‎Libraries/MLXLMCommon/ModelContainer.swift
Lines changed: 7 additions & 3 deletions
diff --git a/‎Libraries/MLXLMCommon/ModelFactory.swift
Lines changed: 4 additions & 4 deletions b/‎Libraries/MLXLMCommon/ModelFactory.swift
Lines changed: 4 additions & 4 deletions
diff --git a/‎Libraries/MLXLMCommon/Registries/AbstractModelRegistry.swift
Lines changed: 43 additions & 0 deletions b/‎Libraries/MLXLMCommon/Registries/AbstractModelRegistry.swift
Lines changed: 43 additions & 0 deletions
diff --git a/‎Libraries/MLXLMCommon/Registries/ModelTypeRegistry.swift
Lines changed: 43 additions & 0 deletions b/‎Libraries/MLXLMCommon/Registries/ModelTypeRegistry.swift
Lines changed: 43 additions & 0 deletions
@@ -20,20 +20,10 @@ private func create<C: Codable, M>(
 /// Registry of model type, e.g 'llama', to functions that can instantiate the model from configuration.
 ///
 /// Typically called via ``LLMModelFactory/load(hub:configuration:progressHandler:)``.
-public class ModelTypeRegistry: @unchecked Sendable {
-
-    /// Creates an empty registry.
-    public init() {
-        self.creators = [:]
-    }
-
-    /// Creates a registry with given creators.
-    public init(creators: [String: @Sendable (URL) throws -> any LanguageModel]) {
-        self.creators = creators
-    }
+public class LLMTypeRegistry: ModelTypeRegistry, @unchecked Sendable {
 
     /// Shared instance with default model types.
-    public static let shared: ModelTypeRegistry = .init(creators: all())
+    public static let shared: LLMTypeRegistry = .init(creators: all())
 
     /// All predefined model types.
     private static func all() -> [String: @Sendable (URL) throws -> any LanguageModel] {
@@ -53,32 +43,6 @@ public class ModelTypeRegistry: @unchecked Sendable {
         ]
     }
 
-    // Note: using NSLock as we have very small (just dictionary get/set)
-    // critical sections and expect no contention.  this allows the methods
-    // to remain synchronous.
-    private let lock = NSLock()
-    private var creators: [String: @Sendable (URL) throws -> any LanguageModel]
-
-    /// Add a new model to the type registry.
-    public func registerModelType(
-        _ type: String, creator: @Sendable @escaping (URL) throws -> any LanguageModel
-    ) {
-        lock.withLock {
-            creators[type] = creator
-        }
-    }
-
-    /// Given a `modelType` and configuration file instantiate a new `LanguageModel`.
-    public func createModel(configuration: URL, modelType: String) throws -> LanguageModel {
-        let creator = lock.withLock {
-            creators[modelType]
-        }
-        guard let creator else {
-            throw ModelFactoryError.unsupportedModelType(modelType)
-        }
-        return try creator(configuration)
-    }
-
 }
 
 /// Registry of models and any overrides that go with them, e.g. prompt augmentation.
@@ -87,23 +51,10 @@ public class ModelTypeRegistry: @unchecked Sendable {
 /// The python tokenizers have a very rich set of implementations and configuration.  The
 /// swift-tokenizers code handles a good chunk of that and this is a place to augment that
 /// implementation, if needed.
-public class ModelRegistry: @unchecked Sendable {
-
-    /// Creates an empty registry.
-    public init() {
-        self.registry = Dictionary()
-    }
-
-    /// Creates a new registry with from given model configurations.
-    public init(modelConfigurations: [ModelConfiguration]) {
-        self.registry = Dictionary(uniqueKeysWithValues: modelConfigurations.map { ($0.name, $0) })
-    }
+public class LLMRegistry: AbstractModelRegistry, @unchecked Sendable {
 
     /// Shared instance with default model configurations.
-    public static let shared = ModelRegistry(modelConfigurations: all())
-
-    private let lock = NSLock()
-    private var registry: [String: ModelConfiguration]
+    public static let shared = LLMRegistry(modelConfigurations: all())
 
     static public let smolLM_135M_4bit = ModelConfiguration(
         id: "mlx-community/SmolLM-135M-Instruct-4bit",
@@ -239,31 +190,11 @@ public class ModelRegistry: @unchecked Sendable {
         ]
     }
 
-    public func register(configurations: [ModelConfiguration]) {
-        lock.withLock {
-            for c in configurations {
-                registry[c.name] = c
-            }
-        }
-    }
-
-    public func configuration(id: String) -> ModelConfiguration {
-        lock.withLock {
-            if let c = registry[id] {
-                return c
-            } else {
-                return ModelConfiguration(id: id)
-            }
-        }
-    }
-
-    public var models: some Collection<ModelConfiguration> & Sendable {
-        lock.withLock {
-            return registry.values
-        }
-    }
 }
 
+@available(*, deprecated, renamed: "LLMRegistry", message: "Please use LLMRegistry directly.")
+public typealias ModelRegistry = LLMRegistry
+
 private struct LLMUserInputProcessor: UserInputProcessor {
 
     let tokenizer: Tokenizer
@@ -304,19 +235,20 @@ private struct LLMUserInputProcessor: UserInputProcessor {
 /// ```
 public class LLMModelFactory: ModelFactory {
 
-    public init(typeRegistry: ModelTypeRegistry, modelRegistry: ModelRegistry) {
+    public init(typeRegistry: ModelTypeRegistry, modelRegistry: AbstractModelRegistry) {
         self.typeRegistry = typeRegistry
         self.modelRegistry = modelRegistry
     }
 
     /// Shared instance with default behavior.
-    public static let shared = LLMModelFactory(typeRegistry: .shared, modelRegistry: .shared)
+    public static let shared = LLMModelFactory(
+        typeRegistry: LLMTypeRegistry.shared, modelRegistry: LLMRegistry.shared)
 
     /// registry of model type, e.g. configuration value `llama` -> configuration and init methods
     public let typeRegistry: ModelTypeRegistry
 
     /// registry of model id to configuration, e.g. `mlx-community/Llama-3.2-3B-Instruct-4bit`
-    public let modelRegistry: ModelRegistry
+    public let modelRegistry: AbstractModelRegistry
 
     public func configuration(id: String) -> ModelConfiguration {
         modelRegistry.configuration(id: id)
 
@@ -163,7 +163,7 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider {
 
     public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]?) -> MLXArray {
         var out = model(inputs, cache: cache)
-        out = matmul(out, model.embedTokens.weight.T)
+        out = model.embedTokens.asLinear(out)
         out = out * self.logitScale
         return out
     }
 
@@ -27,7 +27,6 @@ func makeDivisible(_ v: Float, divisor: Int = 8, minValue: Float? = nil) -> Int
 }
 
 private class MultiHeadCausalAttention: Module {
-    var args: OpenElmConfiguration
     let scale: Float
     let heads: Int
     let headDim: Int
@@ -36,18 +35,17 @@ private class MultiHeadCausalAttention: Module {
     @ModuleInfo(key: "qkv_proj") var qkvProj: Linear
     @ModuleInfo(key: "out_proj") var outProj: Linear
 
-    @ModuleInfo(key: "q_norm") var qNorm: RMSNorm
-    @ModuleInfo(key: "k_norm") var kNorm: RMSNorm
+    @ModuleInfo(key: "q_norm") var qNorm: RMSNorm?
+    @ModuleInfo(key: "k_norm") var kNorm: RMSNorm?
 
     let rope: RoPE
 
     public init(_ args: OpenElmConfiguration, layerId: Int) {
-        self.args = args
         self.headDim = args.headDimensions
         let modelDim = args.modelDim
 
-        self.heads = self.args.numQueryHeads[layerId]
-        self.kvHeads = self.args.kvHeads[layerId]
+        self.heads = args.numQueryHeads[layerId]
+        self.kvHeads = args.kvHeads[layerId]
         self.scale = pow(Float(headDim), -0.5)
 
         let opSize = (heads + (kvHeads * 2)) * headDim
@@ -74,7 +72,7 @@ private class MultiHeadCausalAttention: Module {
         var keys = qkvSplit[1]
         var values = qkvSplit[2]
 
-        if args.normalizeQkProjections {
+        if let qNorm, let kNorm {
             queries = qNorm(queries)
             keys = kNorm(keys)
         }
@@ -181,27 +179,27 @@ public class OpenELMModel: Module, LLMModel, KVCacheDimensionProvider {
     public let vocabularySize: Int
     public let kvHeads: [Int]
 
-    let shareInputOutputLayers: Bool
     let transformer: OpenELMModelInner
 
-    @ModuleInfo(key: "lm_head") var lmHead: Linear
+    @ModuleInfo(key: "lm_head") var lmHead: Linear?
 
     public init(_ args: OpenElmConfiguration) {
         self.vocabularySize = args.vocabularySize
         self.kvHeads = args.kvHeads
 
         self.transformer = OpenELMModelInner(args)
-        self.shareInputOutputLayers = args.shareInputOutputLayers
-        self._lmHead.wrappedValue = Linear(
-            args.numTransformerLayers, args.vocabularySize, bias: false)
+        if !args.shareInputOutputLayers {
+            self._lmHead.wrappedValue = Linear(
+                args.numTransformerLayers, args.vocabularySize, bias: false)
+        }
     }
 
     public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]?) -> MLXArray {
         var out = transformer(inputs, cache: cache)
-        if shareInputOutputLayers {
-            out = matmul(out, transformer.embedTokens.weight.T)
-        } else {
+        if let lmHead {
             out = lmHead(out)
+        } else {
+            out = transformer.embedTokens.asLinear(out)
         }
 
         return out
 
@@ -173,7 +173,7 @@ public class Starcoder2Model: Module, LLMModel, KVCacheDimensionProvider {
         if !tieWordEmbeddings {
             return lmHead(out)
         } else {
-            out = matmul(out, model.embedTokens.weight.T)
+            out = model.embedTokens.asLinear(out)
             return out
         }
     }
 
@@ -31,10 +31,10 @@ public struct ModelConfiguration: Sendable {
     public let overrideTokenizer: String?
 
     /// A reasonable default prompt for the model
-    public let defaultPrompt: String
+    public var defaultPrompt: String
 
     /// Additional tokens to use for end of string
-    public let extraEOSTokens: Set<String>
+    public var extraEOSTokens: Set<String>
 
     public init(
         id: String, tokenizerId: String? = nil, overrideTokenizer: String? = nil,
 
@@ -32,12 +32,11 @@ import Tokenizers
 /// }
 /// ```
 public actor ModelContainer {
-    let context: ModelContext
-    nonisolated public let configuration: ModelConfiguration
+    var context: ModelContext
+    public var configuration: ModelConfiguration { context.configuration }
 
     public init(context: ModelContext) {
         self.context = context
-        self.configuration = context.configuration
     }
 
     /// Perform an action on the model and/or tokenizer.  Callers _must_ eval any `MLXArray` before returning as
@@ -75,4 +74,9 @@ public actor ModelContainer {
         try await action(context, values)
     }
 
+    /// Update the owned `ModelContext`.
+    /// - Parameter action: update action
+    public func update(_ action: @Sendable (inout ModelContext) -> Void) {
+        action(&context)
+    }
 }
@@ -22,10 +22,10 @@ public enum ModelFactoryError: Error {
 /// See also ``ModelFactory/loadContainer(hub:configuration:progressHandler:)`` and
 /// ``ModelContainer``.
 public struct ModelContext {
-    public let configuration: ModelConfiguration
-    public let model: any LanguageModel
-    public let processor: any UserInputProcessor
-    public let tokenizer: Tokenizer
+    public var configuration: ModelConfiguration
+    public var model: any LanguageModel
+    public var processor: any UserInputProcessor
+    public var tokenizer: Tokenizer
 
     public init(
         configuration: ModelConfiguration, model: any LanguageModel,
 
@@ -0,0 +1,43 @@
+// Copyright © 2024 Apple Inc.
+
+import Foundation
+
+open class AbstractModelRegistry: @unchecked Sendable {
+
+    /// Creates an empty registry.
+    public init() {
+        self.registry = Dictionary()
+    }
+
+    /// Creates a new registry with from given model configurations.
+    public init(modelConfigurations: [ModelConfiguration]) {
+        self.registry = Dictionary(uniqueKeysWithValues: modelConfigurations.map { ($0.name, $0) })
+    }
+
+    private let lock = NSLock()
+    private var registry: [String: ModelConfiguration]
+
+    public func register(configurations: [ModelConfiguration]) {
+        lock.withLock {
+            for c in configurations {
+                registry[c.name] = c
+            }
+        }
+    }
+
+    public func configuration(id: String) -> ModelConfiguration {
+        lock.withLock {
+            if let c = registry[id] {
+                return c
+            } else {
+                return ModelConfiguration(id: id)
+            }
+        }
+    }
+
+    public var models: some Collection<ModelConfiguration> & Sendable {
+        lock.withLock {
+            return registry.values
+        }
+    }
+}
@@ -0,0 +1,43 @@
+// Copyright © 2024 Apple Inc.
+
+import Foundation
+
+open class ModelTypeRegistry: @unchecked Sendable {
+
+    /// Creates an empty registry.
+    public init() {
+        self.creators = [:]
+    }
+
+    /// Creates a registry with given creators.
+    public init(creators: [String: @Sendable (URL) throws -> any LanguageModel]) {
+        self.creators = creators
+    }
+
+    // Note: using NSLock as we have very small (just dictionary get/set)
+    // critical sections and expect no contention.  this allows the methods
+    // to remain synchronous.
+    private let lock = NSLock()
+    private var creators: [String: @Sendable (URL) throws -> any LanguageModel]
+
+    /// Add a new model to the type registry.
+    public func registerModelType(
+        _ type: String, creator: @Sendable @escaping (URL) throws -> any LanguageModel
+    ) {
+        lock.withLock {
+            creators[type] = creator
+        }
+    }
+
+    /// Given a `modelType` and configuration file instantiate a new `LanguageModel`.
+    public func createModel(configuration: URL, modelType: String) throws -> LanguageModel {
+        let creator = lock.withLock {
+            creators[modelType]
+        }
+        guard let creator else {
+            throw ModelFactoryError.unsupportedModelType(modelType)
+        }
+        return try creator(configuration)
+    }
+
+}
Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider {`
`163`	`163`
`164`	`164`	`public func callAsFunction(_ inputs: MLXArray, cache: [KVCache]?) -> MLXArray {`
`165`	`165`	`var out = model(inputs, cache: cache)`
`166`		`- out = matmul(out, model.embedTokens.weight.T)`
	`166`	`+ out = model.embedTokens.asLinear(out)`
`167`	`167`	`out = out * self.logitScale`
`168`	`168`	`return out`
`169`	`169`	`}`
Original file line number	Diff line number	Diff line change
`@@ -173,7 +173,7 @@ public class Starcoder2Model: Module, LLMModel, KVCacheDimensionProvider {`
`173`	`173`	`if !tieWordEmbeddings {`
`174`	`174`	`return lmHead(out)`
`175`	`175`	`} else {`
`176`		`- out = matmul(out, model.embedTokens.weight.T)`
	`176`	`+ out = model.embedTokens.asLinear(out)`
`177`	`177`	`return out`
`178`	`178`	`}`
`179`	`179`	`}`