ml-explore · davidkoski · May 14, 2025 · May 14, 2025 · May 14, 2025 · May 14, 2025
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -38,17 +38,17 @@ jobs:
             xcrun --show-sdk-build-version
             swift --version
             find . -name Package.resolved -exec rm {} \;
-            xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X'
+            xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X' -skipMacroValidation
       - run:
           name: Build Examples
           command: |
             xcodebuild -version
             xcrun --show-sdk-build-version
             swift --version
             find . -name Package.resolved -exec rm {} \;
-            xcodebuild -scheme llm-tool
-            xcodebuild -scheme image-tool
-            xcodebuild -scheme mnist-tool
+            xcodebuild -scheme llm-tool -skipMacroValidation
+            xcodebuild -scheme image-tool -skipMacroValidation
+            xcodebuild -scheme mnist-tool -skipMacroValidation
 
 workflows:
   build_and_test:

diff --git a/Libraries/Embedders/Pooling.swift b/Libraries/Embedders/Pooling.swift
@@ -5,7 +5,7 @@ import MLX
 import MLXLinalg
 import MLXNN
 
-public struct PoolingConfiguration: Codable {
+public struct PoolingConfiguration: Codable, Sendable {
     public let dimension: Int
     public let poolingModeClsToken: Bool
     public let poolingModeMeanTokens: Bool

diff --git a/Libraries/MLXLLM/Lora+Data.swift b/Libraries/MLXLLM/Lora+Data.swift
@@ -48,7 +48,7 @@ public func loadLoRAData(url: URL) throws -> [String] {
 
 func loadJSONL(url: URL) throws -> [String] {
 
-    struct Line: Codable {
+    struct Line: Codable, Sendable {
         let text: String?
     }
 

diff --git a/Libraries/MLXLLM/Models/Cohere.swift b/Libraries/MLXLLM/Models/Cohere.swift
@@ -2,8 +2,9 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
-// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/cohere.py
+// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/cohere.py
 
 private class Attention: Module {
 
@@ -168,63 +169,21 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct CohereConfiguration: Codable, Sendable {
-
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var layerNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 8000000.0
-    var ropeTraditional: Bool = true
-    var ropeScaling: [String: StringOrNumber]? = nil
-    var logitScale: Float
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case vocabularySize = "vocab_size"
-        case layerNormEps = "layer_norm_eps"
-        case logitScale = "logit_scale"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-    }
+@Codable
+public struct CohereConfiguration: Sendable {
+
+    @CodingKey("hidden_size") public var hiddenSize: Int = 8192
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int = 40
+    @CodingKey("intermediate_size") public var intermediateSize: Int = 22528
+    @CodingKey("num_attention_heads") public var attentionHeads: Int = 64
+    @CodingKey("num_key_value_heads") public var layerNormEps: Float = 1e-5
+    @CodingKey("rope_theta") public var vocabularySize: Int = 256000
+    @CodingKey("vocab_size") public var kvHeads: Int = 64
+    @CodingKey("layer_norm_eps") public var ropeTheta: Float = 8000000.0
+    @CodingKey("logit_scale") public var ropeTraditional: Bool = true
+    @CodingKey("rope_traditional") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("rope_scaling") public var logitScale: Float = 0.0625
 
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<CohereConfiguration.CodingKeys> =
-            try decoder.container(
-                keyedBy: CohereConfiguration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.attentionHeads)
-        self.layerNormEps = try container.decode(
-            Float.self, forKey: CohereConfiguration.CodingKeys.layerNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: CohereConfiguration.CodingKeys.ropeTheta)
-            ?? 8000000.0
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: CohereConfiguration.CodingKeys.ropeScaling)
-        self.logitScale = try container.decode(
-            Float.self, forKey: CohereConfiguration.CodingKeys.logitScale)
-    }
 }
 
 // MARK: - LoRA

diff --git a/Libraries/MLXLLM/Models/GLM4.swift b/Libraries/MLXLLM/Models/GLM4.swift
@@ -9,6 +9,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/glm4.py
 
@@ -164,15 +165,13 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider {
 
     private let model: GLM4ModelInner
     let configuration: GLM4Configuration
-    let modelType: String
 
     @ModuleInfo(key: "lm_head") var lmHead: Linear
 
     public init(_ args: GLM4Configuration) {
         self.configuration = args
         self.vocabularySize = args.vocabularySize
         self.kvHeads = (0 ..< args.hiddenLayers).map { _ in args.kvHeads }
-        self.modelType = args.modelType
         self.model = GLM4ModelInner(args)
 
         _lmHead.wrappedValue = Linear(args.hiddenSize, args.vocabularySize, bias: false)
@@ -194,80 +193,22 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct GLM4Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var attentionBias: Bool
-    var headDim: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var partialRotaryFactor: Float
-    var ropeTheta: Float = 10000.0
-    var ropeTraditional: Bool = true
-    var tieWordEmbeddings = false
-    var maxPositionEmbeddings: Int = 32768
-    var modelType: String
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case attentionBias = "attention_bias"
-        case headDim = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case partialRotaryFactor = "partial_rotary_factor"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case modelType = "model_type"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container: KeyedDecodingContainer<GLM4Configuration.CodingKeys> =
-            try decoder.container(
-                keyedBy: GLM4Configuration.CodingKeys.self)
-
-        self.modelType = try container.decode(
-            String.self, forKey: GLM4Configuration.CodingKeys.modelType)
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.attentionHeads)
-        self.attentionBias = try container.decode(
-            Bool.self, forKey: GLM4Configuration.CodingKeys.attentionBias)
-        self.headDim = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.headDim)
-        self.rmsNormEps = try container.decode(
-            Float.self, forKey: GLM4Configuration.CodingKeys.rmsNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: GLM4Configuration.CodingKeys.kvHeads)
-        self.partialRotaryFactor = try container.decode(
-            Float.self, forKey: GLM4Configuration.CodingKeys.partialRotaryFactor)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: GLM4Configuration.CodingKeys.ropeTheta)
-            ?? 10000.0
-        self.ropeTraditional =
-            try container.decodeIfPresent(
-                Bool.self, forKey: GLM4Configuration.CodingKeys.ropeTraditional)
-            ?? true
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false
-        self.maxPositionEmbeddings =
-            try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768
-    }
+@Codable
+public struct GLM4Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("attention_bias") public var attentionBias: Bool
+    @CodingKey("head_dim") public var headDim: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = true
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768
 }
 
 // MARK: - LoRA

diff --git a/Libraries/MLXLLM/Models/Gemma.swift b/Libraries/MLXLLM/Models/Gemma.swift
@@ -4,9 +4,10 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
-// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/gemma.py
+// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/gemma.py
 
 // Specialized norm for Gemma
 private class RMSNorm: Module, UnaryLayer {
@@ -174,11 +175,9 @@ public class GemmaModel: Module, LLMModel, KVCacheDimensionProvider {
     public let vocabularySize: Int
     public let kvHeads: [Int]
 
-    let modelType: String
     private let model: GemmaModelInner
 
     public init(_ args: GemmaConfiguration) {
-        self.modelType = args.modelType
         self.vocabularySize = args.vocabularySize
         self.kvHeads = Array(repeating: args.kvHeads, count: args.hiddenLayers)
         self.model = GemmaModelInner(args)
@@ -194,34 +193,18 @@ public class GemmaModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct GemmaConfiguration: Codable, Sendable {
-    var modelType: String
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var headDimensions: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    private let _ropeTheta: Float?
-    public var ropeTheta: Float { _ropeTheta ?? 10_000 }
-    private let _ropeTraditional: Bool?
-    public var ropeTraditional: Bool { _ropeTraditional ?? false }
-
-    enum CodingKeys: String, CodingKey {
-        case modelType = "model_type"
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case headDimensions = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case _ropeTheta = "rope_theta"
-        case _ropeTraditional = "rope_traditional"
-    }
+@Codable
+public struct GemmaConfiguration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("head_dim") public var headDimensions: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
 }
 
 // MARK: - LoRA