diff --git a/.circleci/config.yml b/.circleci/config.yml index 5a97f48a..8ffa27c1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,7 +38,7 @@ jobs: xcrun --show-sdk-build-version swift --version find . -name Package.resolved -exec rm {} \; - xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X' + xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X' -skipMacroValidation - run: name: Build Examples command: | @@ -46,9 +46,9 @@ jobs: xcrun --show-sdk-build-version swift --version find . -name Package.resolved -exec rm {} \; - xcodebuild -scheme llm-tool - xcodebuild -scheme image-tool - xcodebuild -scheme mnist-tool + xcodebuild -scheme llm-tool -skipMacroValidation + xcodebuild -scheme image-tool -skipMacroValidation + xcodebuild -scheme mnist-tool -skipMacroValidation workflows: build_and_test: diff --git a/Libraries/Embedders/Pooling.swift b/Libraries/Embedders/Pooling.swift index 912f37b8..c8c65074 100644 --- a/Libraries/Embedders/Pooling.swift +++ b/Libraries/Embedders/Pooling.swift @@ -5,7 +5,7 @@ import MLX import MLXLinalg import MLXNN -public struct PoolingConfiguration: Codable { +public struct PoolingConfiguration: Codable, Sendable { public let dimension: Int public let poolingModeClsToken: Bool public let poolingModeMeanTokens: Bool diff --git a/Libraries/MLXLLM/Lora+Data.swift b/Libraries/MLXLLM/Lora+Data.swift index 975e41f4..defa8f5a 100644 --- a/Libraries/MLXLLM/Lora+Data.swift +++ b/Libraries/MLXLLM/Lora+Data.swift @@ -48,7 +48,7 @@ public func loadLoRAData(url: URL) throws -> [String] { func loadJSONL(url: URL) throws -> [String] { - struct Line: Codable { + struct Line: Codable, Sendable { let text: String? } diff --git a/Libraries/MLXLLM/Models/Cohere.swift b/Libraries/MLXLLM/Models/Cohere.swift index 5a8c1bed..647d358f 100644 --- a/Libraries/MLXLLM/Models/Cohere.swift +++ b/Libraries/MLXLLM/Models/Cohere.swift @@ -2,8 +2,9 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable -// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/cohere.py +// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/cohere.py private class Attention: Module { @@ -168,63 +169,21 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct CohereConfiguration: Codable, Sendable { - - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var layerNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var ropeTheta: Float = 8000000.0 - var ropeTraditional: Bool = true - var ropeScaling: [String: StringOrNumber]? = nil - var logitScale: Float - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case kvHeads = "num_key_value_heads" - case ropeTheta = "rope_theta" - case vocabularySize = "vocab_size" - case layerNormEps = "layer_norm_eps" - case logitScale = "logit_scale" - case ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - } +@Codable +public struct CohereConfiguration: Sendable { + + @CodingKey("hidden_size") public var hiddenSize: Int = 8192 + @CodingKey("num_hidden_layers") public var hiddenLayers: Int = 40 + @CodingKey("intermediate_size") public var intermediateSize: Int = 22528 + @CodingKey("num_attention_heads") public var attentionHeads: Int = 64 + @CodingKey("num_key_value_heads") public var layerNormEps: Float = 1e-5 + @CodingKey("rope_theta") public var vocabularySize: Int = 256000 + @CodingKey("vocab_size") public var kvHeads: Int = 64 + @CodingKey("layer_norm_eps") public var ropeTheta: Float = 8000000.0 + @CodingKey("logit_scale") public var ropeTraditional: Bool = true + @CodingKey("rope_traditional") public var ropeScaling: [String: StringOrNumber]? = nil + @CodingKey("rope_scaling") public var logitScale: Float = 0.0625 - public init(from decoder: Decoder) throws { - // custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = - try decoder.container( - keyedBy: CohereConfiguration.CodingKeys.self) - - self.hiddenSize = try container.decode( - Int.self, forKey: CohereConfiguration.CodingKeys.hiddenSize) - self.hiddenLayers = try container.decode( - Int.self, forKey: CohereConfiguration.CodingKeys.hiddenLayers) - self.intermediateSize = try container.decode( - Int.self, forKey: CohereConfiguration.CodingKeys.intermediateSize) - self.attentionHeads = try container.decode( - Int.self, forKey: CohereConfiguration.CodingKeys.attentionHeads) - self.layerNormEps = try container.decode( - Float.self, forKey: CohereConfiguration.CodingKeys.layerNormEps) - self.vocabularySize = try container.decode( - Int.self, forKey: CohereConfiguration.CodingKeys.vocabularySize) - self.kvHeads = try container.decode( - Int.self, forKey: CohereConfiguration.CodingKeys.kvHeads) - self.ropeTheta = - try container.decodeIfPresent( - Float.self, forKey: CohereConfiguration.CodingKeys.ropeTheta) - ?? 8000000.0 - self.ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: CohereConfiguration.CodingKeys.ropeScaling) - self.logitScale = try container.decode( - Float.self, forKey: CohereConfiguration.CodingKeys.logitScale) - } } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/GLM4.swift b/Libraries/MLXLLM/Models/GLM4.swift index 919550ba..46f865d9 100644 --- a/Libraries/MLXLLM/Models/GLM4.swift +++ b/Libraries/MLXLLM/Models/GLM4.swift @@ -9,6 +9,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/glm4.py @@ -164,7 +165,6 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider { private let model: GLM4ModelInner let configuration: GLM4Configuration - let modelType: String @ModuleInfo(key: "lm_head") var lmHead: Linear @@ -172,7 +172,6 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider { self.configuration = args self.vocabularySize = args.vocabularySize self.kvHeads = (0 ..< args.hiddenLayers).map { _ in args.kvHeads } - self.modelType = args.modelType self.model = GLM4ModelInner(args) _lmHead.wrappedValue = Linear(args.hiddenSize, args.vocabularySize, bias: false) @@ -194,80 +193,22 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider { } } -public struct GLM4Configuration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var attentionBias: Bool - var headDim: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var partialRotaryFactor: Float - var ropeTheta: Float = 10000.0 - var ropeTraditional: Bool = true - var tieWordEmbeddings = false - var maxPositionEmbeddings: Int = 32768 - var modelType: String - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case attentionBias = "attention_bias" - case headDim = "head_dim" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case partialRotaryFactor = "partial_rotary_factor" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case tieWordEmbeddings = "tie_word_embeddings" - case maxPositionEmbeddings = "max_position_embeddings" - case modelType = "model_type" - } - - public init(from decoder: Decoder) throws { - let container: KeyedDecodingContainer = - try decoder.container( - keyedBy: GLM4Configuration.CodingKeys.self) - - self.modelType = try container.decode( - String.self, forKey: GLM4Configuration.CodingKeys.modelType) - self.hiddenSize = try container.decode( - Int.self, forKey: GLM4Configuration.CodingKeys.hiddenSize) - self.hiddenLayers = try container.decode( - Int.self, forKey: GLM4Configuration.CodingKeys.hiddenLayers) - self.intermediateSize = try container.decode( - Int.self, forKey: GLM4Configuration.CodingKeys.intermediateSize) - self.attentionHeads = try container.decode( - Int.self, forKey: GLM4Configuration.CodingKeys.attentionHeads) - self.attentionBias = try container.decode( - Bool.self, forKey: GLM4Configuration.CodingKeys.attentionBias) - self.headDim = try container.decode( - Int.self, forKey: GLM4Configuration.CodingKeys.headDim) - self.rmsNormEps = try container.decode( - Float.self, forKey: GLM4Configuration.CodingKeys.rmsNormEps) - self.vocabularySize = try container.decode( - Int.self, forKey: GLM4Configuration.CodingKeys.vocabularySize) - self.kvHeads = try container.decode(Int.self, forKey: GLM4Configuration.CodingKeys.kvHeads) - self.partialRotaryFactor = try container.decode( - Float.self, forKey: GLM4Configuration.CodingKeys.partialRotaryFactor) - self.ropeTheta = - try container.decodeIfPresent( - Float.self, forKey: GLM4Configuration.CodingKeys.ropeTheta) - ?? 10000.0 - self.ropeTraditional = - try container.decodeIfPresent( - Bool.self, forKey: GLM4Configuration.CodingKeys.ropeTraditional) - ?? true - self.tieWordEmbeddings = - try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false - self.maxPositionEmbeddings = - try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768 - } +@Codable +public struct GLM4Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("attention_bias") public var attentionBias: Bool + @CodingKey("head_dim") public var headDim: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float + @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = true + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768 } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Gemma.swift b/Libraries/MLXLLM/Models/Gemma.swift index 1818456e..126d3b44 100644 --- a/Libraries/MLXLLM/Models/Gemma.swift +++ b/Libraries/MLXLLM/Models/Gemma.swift @@ -4,9 +4,10 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers -// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/gemma.py +// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/gemma.py // Specialized norm for Gemma private class RMSNorm: Module, UnaryLayer { @@ -174,11 +175,9 @@ public class GemmaModel: Module, LLMModel, KVCacheDimensionProvider { public let vocabularySize: Int public let kvHeads: [Int] - let modelType: String private let model: GemmaModelInner public init(_ args: GemmaConfiguration) { - self.modelType = args.modelType self.vocabularySize = args.vocabularySize self.kvHeads = Array(repeating: args.kvHeads, count: args.hiddenLayers) self.model = GemmaModelInner(args) @@ -194,34 +193,18 @@ public class GemmaModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct GemmaConfiguration: Codable, Sendable { - var modelType: String - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var headDimensions: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - private let _ropeTheta: Float? - public var ropeTheta: Float { _ropeTheta ?? 10_000 } - private let _ropeTraditional: Bool? - public var ropeTraditional: Bool { _ropeTraditional ?? false } - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case headDimensions = "head_dim" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case _ropeTheta = "rope_theta" - case _ropeTraditional = "rope_traditional" - } +@Codable +public struct GemmaConfiguration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("head_dim") public var headDimensions: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("rope_theta") public var ropeTheta: Float = 10_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Gemma2.swift b/Libraries/MLXLLM/Models/Gemma2.swift index 561477c1..bf1ba415 100644 --- a/Libraries/MLXLLM/Models/Gemma2.swift +++ b/Libraries/MLXLLM/Models/Gemma2.swift @@ -4,9 +4,10 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers -// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/gemma2.py +// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/gemma2.py private class Attention: Module { let args: Gemma2Configuration @@ -203,70 +204,21 @@ public class Gemma2Model: Module, LLMModel, KVCacheDimensionProvider { } } -public struct Gemma2Configuration: Codable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var headDimensions: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var ropeTheta: Float = 10_000 - var ropeTraditional: Bool = false - var attnLogitSoftcapping: Float = 50.0 - var finalLogitSoftcapping: Float = 30.0 - var queryPreAttnScalar: Float = 144.0 - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case headDimensions = "head_dim" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case attnLogitSoftcapping = "attn_logit_softcapping" - case finalLogitSoftcapping = "final_logit_softcapping" - case queryPreAttnScalar = "query_pre_attn_scalar" - } - - public init(from decoder: Swift.Decoder) throws { - // Custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = try decoder.container( - keyedBy: CodingKeys.self) - - self.hiddenSize = try container.decode( - Int.self, forKey: CodingKeys.hiddenSize) - self.hiddenLayers = try container.decode( - Int.self, forKey: CodingKeys.hiddenLayers) - self.intermediateSize = try container.decode( - Int.self, forKey: CodingKeys.intermediateSize) - self.attentionHeads = try container.decode( - Int.self, forKey: CodingKeys.attentionHeads) - self.headDimensions = try container.decode( - Int.self, forKey: CodingKeys.headDimensions) - self.rmsNormEps = try container.decode( - Float.self, forKey: CodingKeys.rmsNormEps) - self.vocabularySize = try container.decode( - Int.self, forKey: CodingKeys.vocabularySize) - self.kvHeads = try container.decode(Int.self, forKey: CodingKeys.kvHeads) - self.ropeTheta = - try container.decodeIfPresent(Float.self, forKey: CodingKeys.ropeTheta) - ?? 10_000 - self.ropeTraditional = - try container.decodeIfPresent( - Bool.self, forKey: CodingKeys.ropeTraditional) ?? false - self.attnLogitSoftcapping = try container.decode( - Float.self, forKey: CodingKeys.attnLogitSoftcapping) - self.finalLogitSoftcapping = try container.decode( - Float.self, forKey: CodingKeys.finalLogitSoftcapping) - self.queryPreAttnScalar = try container.decode( - Float.self, forKey: CodingKeys.queryPreAttnScalar) - } +@Codable +public struct Gemma2Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("head_dim") public var headDimensions: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("rope_theta") public var ropeTheta: Float = 10_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("attn_logit_softcapping") public var attnLogitSoftcapping: Float = 50.0 + @CodingKey("final_logit_softcapping") public var finalLogitSoftcapping: Float = 30.0 + @CodingKey("query_pre_attn_scalar") public var queryPreAttnScalar: Float = 144.0 } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Granite.swift b/Libraries/MLXLLM/Models/Granite.swift index f252f9c3..b222e52d 100644 --- a/Libraries/MLXLLM/Models/Granite.swift +++ b/Libraries/MLXLLM/Models/Granite.swift @@ -5,12 +5,13 @@ // Created by Sachin Desai on 4/25/25. // -// Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/granite.py - import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable + +// Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/granite.py private class Attention: Module { let args: GraniteConfiguration @@ -210,69 +211,25 @@ public class GraniteModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct GraniteConfiguration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var rmsNormEps: Float - var vocabularySize: Int - var logitsScaling: Float - var attentionMultiplier: Float - var embeddingMultiplier: Float - var residualMultiplier: Float - var maxPositionEmbeddings: Int - var kvHeads: Int - var attentionBias: Bool - var mlpBias: Bool - var ropeTheta: Float - var ropeTraditional: Bool = false - var ropeScaling: [String: StringOrNumber]? = nil - var tieWordEmbeddings: Bool = true - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case logitsScaling = "logits_scaling" - case attentionMultiplier = "attention_multiplier" - case embeddingMultiplier = "embedding_multiplier" - case residualMultiplier = "residual_multiplier" - case maxPositionEmbeddings = "max_position_embeddings" - case kvHeads = "num_key_value_heads" - case attentionBias = "attention_bias" - case mlpBias = "mlp_bias" - case ropeTheta = "rope_theta" - case ropeScaling = "rope_scaling" - case tieWordEmbeddings = "tie_word_embeddings" - } - - public init(from decoder: Decoder) throws { - let container: KeyedDecodingContainer = - try decoder.container(keyedBy: GraniteConfiguration.CodingKeys.self) - - self.hiddenSize = try container.decode(Int.self, forKey: .hiddenSize) - self.hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers) - self.intermediateSize = try container.decode(Int.self, forKey: .intermediateSize) - self.attentionHeads = try container.decode(Int.self, forKey: .attentionHeads) - self.rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps) - self.vocabularySize = try container.decode(Int.self, forKey: .vocabularySize) - self.logitsScaling = try container.decode(Float.self, forKey: .logitsScaling) - self.attentionMultiplier = try container.decode(Float.self, forKey: .attentionMultiplier) - self.embeddingMultiplier = try container.decode(Float.self, forKey: .embeddingMultiplier) - self.residualMultiplier = try container.decode(Float.self, forKey: .residualMultiplier) - self.maxPositionEmbeddings = try container.decode(Int.self, forKey: .maxPositionEmbeddings) - self.kvHeads = try container.decode(Int.self, forKey: .kvHeads) - self.attentionBias = try container.decode(Bool.self, forKey: .attentionBias) - self.mlpBias = try container.decode(Bool.self, forKey: .mlpBias) ?? false - self.ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) ?? 10000000.0 - self.ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: .ropeScaling) - self.tieWordEmbeddings = try container.decode(Bool.self, forKey: .tieWordEmbeddings) - } +@Codable +public struct GraniteConfiguration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("logits_scaling") public var logitsScaling: Float + @CodingKey("attention_multiplier") public var attentionMultiplier: Float + @CodingKey("embedding_multiplier") public var embeddingMultiplier: Float + @CodingKey("residual_multiplier") public var residualMultiplier: Float + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("attention_bias") public var attentionBias: Bool + @CodingKey("mlp_bias") public var mlpBias: Bool + @CodingKey("rope_theta") public var ropeTheta: Float + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Internlm2.swift b/Libraries/MLXLLM/Models/Internlm2.swift index 8e1057bc..08f7d832 100644 --- a/Libraries/MLXLLM/Models/Internlm2.swift +++ b/Libraries/MLXLLM/Models/Internlm2.swift @@ -4,6 +4,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable // Port of https://github.com/maiqingqiang/mlx-examples/blob/main/llms/mlx_lm/models/internlm2.py @@ -236,76 +237,36 @@ extension InternLM2Model: LoRAModel { } } -public struct InternLM2Configuration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var maxPositionEmbeddings: Int = 32768 - var ropeTheta: Float = 10000 - var ropeTraditional: Bool = false - var ropeScaling: [String: StringOrNumber]? - var tieWordEmbeddings: Bool = false - var bias: Bool = true +@Codable +public struct InternLM2Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768 + @CodingKey("rope_theta") public var ropeTheta: Float = 10000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false + @CodingKey("bias") public var bias: Bool = true var kvGroups: Int { attentionHeads / kvHeads } - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case maxPositionEmbeddings = "max_position_embeddings" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case tieWordEmbeddings = "tie_word_embeddings" - case bias = "bias" - } - - public init(from decoder: Decoder) throws { - let container = try decoder.container(keyedBy: CodingKeys.self) - - hiddenSize = try container.decode(Int.self, forKey: .hiddenSize) - hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers) - intermediateSize = try container.decode(Int.self, forKey: .intermediateSize) - attentionHeads = try container.decode(Int.self, forKey: .attentionHeads) - rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps) - vocabularySize = try container.decode(Int.self, forKey: .vocabularySize) - kvHeads = try container.decodeIfPresent(Int.self, forKey: .kvHeads) ?? attentionHeads - maxPositionEmbeddings = try container.decode(Int.self, forKey: .maxPositionEmbeddings) - if let ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) { - self.ropeTheta = ropeTheta - } - if let ropeTraditional = try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional) - { - self.ropeTraditional = ropeTraditional - } - ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: .ropeScaling) - if let tieWordEmbeddings = try container.decodeIfPresent( - Bool.self, forKey: .tieWordEmbeddings) - { - self.tieWordEmbeddings = tieWordEmbeddings - } - if let bias = try container.decodeIfPresent(Bool.self, forKey: .bias) { - self.bias = bias - } + public func didDecode(from decoder: any Decoder) throws { + let container = try decoder.container(keyedBy: AnyCodingKey.self) + let codingKey = AnyCodingKey("rope_scaling") if let ropeScaling { let requiredKeys: Set = ["factor", "type"] let keys = Set(ropeScaling.keys) if !requiredKeys.isSubset(of: keys) { throw DecodingError.dataCorruptedError( - forKey: .ropeScaling, in: container, + forKey: codingKey, in: container, debugDescription: "rope_scaling must contain keys \(requiredKeys)" ) } @@ -313,7 +274,7 @@ public struct InternLM2Configuration: Codable, Sendable { type != .string("linear") && type != .string("dynamic") { throw DecodingError.dataCorruptedError( - forKey: .ropeScaling, in: container, + forKey: codingKey, in: container, debugDescription: "rope_scaling 'type' currently only supports 'linear' or 'dynamic'" ) diff --git a/Libraries/MLXLLM/Models/Llama.swift b/Libraries/MLXLLM/Models/Llama.swift index ab857234..4fef7ddc 100644 --- a/Libraries/MLXLLM/Models/Llama.swift +++ b/Libraries/MLXLLM/Models/Llama.swift @@ -4,9 +4,10 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers -// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/llama.py +// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/llama.py func computeBaseFrequency( base: Float, dims: Int, ropeType: String, ropeScaling: [String: StringOrNumber]? @@ -336,23 +337,24 @@ public class LlamaModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct LlamaConfiguration: Codable, Sendable { - - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var headDimensions: Int? - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var maxPositionEmbeddings: Int? - var ropeTheta: Float = 10_000 - var ropeTraditional: Bool = false - var ropeScaling: [String: StringOrNumber]? - var tieWordEmbeddings: Bool = true - var attentionBias: Bool = false - var mlpBias: Bool = false +@Codable +public struct LlamaConfiguration: Sendable { + + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("head_dim") public var headDimensions: Int? + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int? + @CodingKey("rope_theta") public var ropeTheta: Float = 10_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true + @CodingKey("attention_bias") public var attentionBias: Bool = false + @CodingKey("mlp_bias") public var mlpBias: Bool = false public init( hiddenSize: Int, hiddenLayers: Int, intermediateSize: Int, attentionHeads: Int, @@ -382,62 +384,14 @@ public struct LlamaConfiguration: Codable, Sendable { headDimensions ?? (hiddenSize / attentionHeads) } - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case headDimensions = "head_dim" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case maxPositionEmbeddings = "max_position_embeddings" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case tieWordEmbeddings = "tie_word_embeddings" - case attentionBias = "attention_bias" - case mlpBias = "mlp_bias" - } - - public init(from decoder: Swift.Decoder) throws { - let container = try decoder.container(keyedBy: CodingKeys.self) - - hiddenSize = try container.decode(Int.self, forKey: .hiddenSize) - hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers) - intermediateSize = try container.decode(Int.self, forKey: .intermediateSize) - attentionHeads = try container.decode(Int.self, forKey: .attentionHeads) - headDimensions = try container.decodeIfPresent(Int.self, forKey: .headDimensions) - rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps) - vocabularySize = try container.decode(Int.self, forKey: .vocabularySize) - kvHeads = try container.decodeIfPresent(Int.self, forKey: .kvHeads) ?? attentionHeads - maxPositionEmbeddings = try container.decodeIfPresent( - Int.self, forKey: .maxPositionEmbeddings) - if let ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) { - self.ropeTheta = ropeTheta - } - if let ropeTraditional = try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional) - { - self.ropeTraditional = ropeTraditional - } - ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: .ropeScaling) - if let tieWordEmbeddings = try container.decodeIfPresent( - Bool.self, forKey: .tieWordEmbeddings) - { - self.tieWordEmbeddings = tieWordEmbeddings - } - if let attentionBias = try container.decodeIfPresent(Bool.self, forKey: .attentionBias) { - self.attentionBias = attentionBias - } - if let mlpBias = try container.decodeIfPresent(Bool.self, forKey: .mlpBias) { - self.mlpBias = mlpBias - } + public func didDecode(from decoder: any Decoder) throws { + let container = try decoder.container(keyedBy: AnyCodingKey.self) + let codingKey = AnyCodingKey("rope_scaling") if let ropeScaling { if ropeScaling["factor"] == nil { throw DecodingError.dataCorruptedError( - forKey: .ropeScaling, in: container, + forKey: codingKey, in: container, debugDescription: "rope_scaling must contain 'factor'") } if let ropeType = ropeScaling["type"] ?? ropeScaling["rope_type"] { @@ -448,7 +402,7 @@ public struct LlamaConfiguration: Codable, Sendable { ] if !options.contains(ropeType) { throw DecodingError.dataCorruptedError( - forKey: .ropeScaling, in: container, + forKey: codingKey, in: container, debugDescription: "rope_scaling 'type' currently only supports 'linear', 'dynamic', or 'llama3'" ) @@ -456,7 +410,7 @@ public struct LlamaConfiguration: Codable, Sendable { } } else { throw DecodingError.dataCorruptedError( - forKey: .ropeScaling, in: container, + forKey: codingKey, in: container, debugDescription: "rope_scaling must contain either 'type' or 'rope_type'") } } diff --git a/Libraries/MLXLLM/Models/MiMo.swift b/Libraries/MLXLLM/Models/MiMo.swift index de67977a..67db6946 100644 --- a/Libraries/MLXLLM/Models/MiMo.swift +++ b/Libraries/MLXLLM/Models/MiMo.swift @@ -9,6 +9,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable private class Attention: Module { let args: MiMoConfiguration @@ -208,59 +209,21 @@ public class MiMoModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct MiMoConfiguration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var maxPositionEmbeddings: Int - var ropeTheta: Float - var ropeTraditional: Bool - var ropeScaling: [String: StringOrNumber]? - var tieWordEmbeddings: Bool - var numNextnPredictLayers: Int - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case maxPositionEmbeddings = "max_position_embeddings" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case tieWordEmbeddings = "tie_word_embeddings" - case numNextnPredictLayers = "num_nextn_predict_layers" - } - - public init(from decoder: Decoder) throws { - let container = try decoder.container(keyedBy: CodingKeys.self) - - self.hiddenSize = try container.decode(Int.self, forKey: .hiddenSize) - self.hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers) - self.intermediateSize = try container.decode(Int.self, forKey: .intermediateSize) - self.attentionHeads = try container.decode(Int.self, forKey: .attentionHeads) - self.rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps) - self.vocabularySize = try container.decode(Int.self, forKey: .vocabularySize) - self.kvHeads = try container.decode(Int.self, forKey: .kvHeads) - self.maxPositionEmbeddings = - try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768 - self.ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) ?? 10000.0 - self.ropeTraditional = - try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional) ?? false - self.ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: .ropeScaling) - self.tieWordEmbeddings = - try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false - self.numNextnPredictLayers = - try container.decodeIfPresent(Int.self, forKey: .numNextnPredictLayers) ?? 2 - } +@Codable +public struct MiMoConfiguration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768 + @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false + @CodingKey("num_nextn_predict_layers") public var numNextnPredictLayers: Int = 2 } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/OpenELM.swift b/Libraries/MLXLLM/Models/OpenELM.swift index 1d67ac55..291d0814 100644 --- a/Libraries/MLXLLM/Models/OpenELM.swift +++ b/Libraries/MLXLLM/Models/OpenELM.swift @@ -9,6 +9,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable func computeHeads(modelDim: Int, headDim: Int) -> Int { assert(modelDim % headDim == 0, "modelDim must be divisible by headDim") @@ -205,58 +206,26 @@ public class OpenELMModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct OpenElmConfiguration: Codable, Sendable { - var modelType: String - var headDimensions: Int - var numTransformerLayers: Int - var modelDim: Int - var vocabularySize: Int - var ffnDimDivisor: Int - var numQueryHeads: [Int] = [] - var kvHeads: [Int] = [] - var ffnWithGlu: Bool = true - var normalizeQkProjections: Bool = true - var shareInputOutputLayers: Bool = true - var rmsNormEps: Float = 1e-6 - var ropeTheta: Float = 10_000 - var ropeTraditional: Bool = false - var numGqaGroups: Int = 4 - var ffnMultipliers: [Float] = [0.5, 4.0] - var qkvMultiplier: [Float] = [0.5, 1.0] - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case headDimensions = "head_dim" - case numTransformerLayers = "num_transformer_layers" - case modelDim = "model_dim" - case vocabularySize = "vocab_size" - case ffnDimDivisor = "ffn_dim_divisor" - case ffnMultipliers = "ffn_multipliers" - case ffnWithGlu = "ffn_with_glu" - case normalizeQkProjections = "normalize_qk_projections" - case shareInputOutputLayers = "share_input_output_layers" - } - - public init(from decoder: Decoder) throws { - // custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = - try decoder.container( - keyedBy: OpenElmConfiguration.CodingKeys.self) - - self.modelType = try container.decode( - String.self, forKey: OpenElmConfiguration.CodingKeys.modelType) - self.headDimensions = try container.decode( - Int.self, forKey: OpenElmConfiguration.CodingKeys.headDimensions) - self.numTransformerLayers = try container.decode( - Int.self, forKey: OpenElmConfiguration.CodingKeys.numTransformerLayers) - - self.modelDim = try container.decode( - Int.self, forKey: OpenElmConfiguration.CodingKeys.modelDim) - self.vocabularySize = try container.decode( - Int.self, forKey: OpenElmConfiguration.CodingKeys.vocabularySize) - self.ffnDimDivisor = try container.decode( - Int.self, forKey: OpenElmConfiguration.CodingKeys.ffnDimDivisor) - +@Codable +public struct OpenElmConfiguration: Sendable { + @CodingKey("head_dim") public var headDimensions: Int + @CodingKey("num_transformer_layers") public var numTransformerLayers: Int + @CodingKey("model_dim") public var modelDim: Int + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("ffn_dim_divisor") public var ffnDimDivisor: Int + @CodingKey("ffn_multipliers") public var ffnMultipliers: [Float] = [0.5, 4.0] + @CodingKey("ffn_with_glu") public var ffnWithGlu: Bool = true + @CodingKey("normalize_qk_projections") public var normalizeQkProjections: Bool = true + @CodingKey("share_input_output_layers") public var shareInputOutputLayers: Bool = true + @CodingIgnored public var numQueryHeads: [Int] = [] + @CodingIgnored public var kvHeads: [Int] = [] + @CodingIgnored public var rmsNormEps: Float = 1e-6 + @CodingIgnored public var ropeTheta: Float = 10_000 + @CodingIgnored public var ropeTraditional: Bool = false + @CodingIgnored public var numGqaGroups: Int = 4 + @CodingIgnored public var qkvMultiplier: [Float] = [0.5, 1.0] + + public mutating func didDecode(from decoder: any Decoder) throws { let qkvMultipliers = stride( from: qkvMultiplier[0], through: qkvMultiplier[1], by: (qkvMultiplier[1] - qkvMultiplier[0]) / Float(numTransformerLayers - 1) @@ -281,16 +250,6 @@ public struct OpenElmConfiguration: Codable, Sendable { by: (ffnMultipliers[1] - ffnMultipliers[0]) / Float(numTransformerLayers - 1) ) .map { round($0 * 100) / 100 } - - self.ffnWithGlu = - try container.decodeIfPresent( - Bool.self, forKey: OpenElmConfiguration.CodingKeys.ffnWithGlu) ?? true - self.normalizeQkProjections = - try container.decodeIfPresent( - Bool.self, forKey: OpenElmConfiguration.CodingKeys.normalizeQkProjections) ?? true - self.shareInputOutputLayers = - try container.decodeIfPresent( - Bool.self, forKey: OpenElmConfiguration.CodingKeys.shareInputOutputLayers) ?? true } } diff --git a/Libraries/MLXLLM/Models/Phi.swift b/Libraries/MLXLLM/Models/Phi.swift index a8aa8d97..43a8b897 100644 --- a/Libraries/MLXLLM/Models/Phi.swift +++ b/Libraries/MLXLLM/Models/Phi.swift @@ -4,8 +4,9 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable -// https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/phi.py +// https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/phi.py private class PhiAttention: Module { @@ -175,59 +176,18 @@ public class PhiModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct PhiConfiguration: Codable, Sendable { - var maxPositionalEmbeddings = 2048 - var vocabularySize = 51200 - var hiddenSize = 2560 - var attentionHeads = 32 - var hiddenLayers = 32 - var kvHeads = 32 - var partialRotaryFactor: Float = 0.4 - var intermediateSize = 10240 - var layerNormEps: Float = 1e-5 - var ropeTheta: Float = 10_000 - - enum CodingKeys: String, CodingKey { - case maxPositionalEmbeddings = "max_position_embeddings" - case vocabularySize = "vocab_size" - case hiddenSize = "hidden_size" - case attentionHeads = "num_attention_heads" - case hiddenLayers = "num_hidden_layers" - case kvHeads = "num_key_value_heads" - case partialRotaryFactor = "partial_rotary_factor" - case intermediateSize = "intermediate_size" - case layerNormEps = "layer_norm_eps" - case ropeTheta = "rope_theta" - } - - public init(from decoder: Decoder) throws { - let container: KeyedDecodingContainer = try decoder.container( - keyedBy: PhiConfiguration.CodingKeys.self) - - self.maxPositionalEmbeddings = try container.decode( - Int.self, forKey: PhiConfiguration.CodingKeys.maxPositionalEmbeddings) - self.vocabularySize = try container.decode( - Int.self, forKey: PhiConfiguration.CodingKeys.vocabularySize) - self.hiddenSize = try container.decode( - Int.self, forKey: PhiConfiguration.CodingKeys.hiddenSize) - self.attentionHeads = try container.decode( - Int.self, forKey: PhiConfiguration.CodingKeys.attentionHeads) - self.hiddenLayers = try container.decode( - Int.self, forKey: PhiConfiguration.CodingKeys.hiddenLayers) - self.kvHeads = - try container.decodeIfPresent(Int.self, forKey: PhiConfiguration.CodingKeys.kvHeads) - ?? attentionHeads - self.partialRotaryFactor = try container.decode( - Float.self, forKey: PhiConfiguration.CodingKeys.partialRotaryFactor) - self.intermediateSize = try container.decode( - Int.self, forKey: PhiConfiguration.CodingKeys.intermediateSize) - self.layerNormEps = try container.decode( - Float.self, forKey: PhiConfiguration.CodingKeys.layerNormEps) - self.ropeTheta = - try container.decodeIfPresent(Float.self, forKey: PhiConfiguration.CodingKeys.ropeTheta) - ?? 10_000 - - } +@Codable +public struct PhiConfiguration: Sendable { + @CodingKey("max_position_embeddings") public var maxPositionalEmbeddings = 2048 + @CodingKey("vocab_size") public var vocabularySize = 51200 + @CodingKey("hidden_size") public var hiddenSize = 2560 + @CodingKey("num_attention_heads") public var attentionHeads = 32 + @CodingKey("num_hidden_layers") public var hiddenLayers = 32 + @CodingKey("num_key_value_heads") public var kvHeads = 32 + @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float = 0.4 + @CodingKey("intermediate_size") public var intermediateSize = 10240 + @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-5 + @CodingKey("rope_theta") public var ropeTheta: Float = 10_000 } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Phi3.swift b/Libraries/MLXLLM/Models/Phi3.swift index b5b153b9..b0f04c56 100644 --- a/Libraries/MLXLLM/Models/Phi3.swift +++ b/Libraries/MLXLLM/Models/Phi3.swift @@ -4,6 +4,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable private class Attention: Module { @@ -225,13 +226,14 @@ public class Phi3Model: Module, LLMModel, KVCacheDimensionProvider { } } -struct RopeScalingWithFactorArrays: Codable { - let longFactor: [Float]? - let shortFactor: [Float]? - let factor: Float? - let type: String? - let longMScale: Float? - let shortMScale: Float? +@Codable +public struct RopeScalingWithFactorArrays: Sendable { + @CodingKey("long_factor") public var longFactor: [Float]? + @CodingKey("short_factor") public var shortFactor: [Float]? + @CodingKey("long_mscale") public var longMScale: Float? + @CodingKey("short_mscale") public var shortMScale: Float? + public var factor: Float? + public var type: String? enum CodingKeys: String, CodingKey { case type @@ -243,74 +245,22 @@ struct RopeScalingWithFactorArrays: Codable { } } -public struct Phi3Configuration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var ropeTheta: Float = 10_000 - var ropeTraditional: Bool = false - var ropeScaling: RopeScalingWithFactorArrays? - var partialRotaryFactor: Float = 1.0 - var maxPositionEmbeddings: Int - var originalMaxPositionEmbeddings: Int - var tieWordEmbeddings: Bool = false - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case partialRotaryFactor = "partial_rotary_factor" - case maxPositionEmbeddings = "max_position_embeddings" - case originalMaxPositionEmbeddings = "original_max_position_embeddings" - case tieWordEmbeddings = "tie_word_embeddings" - } - - public init(from decoder: Decoder) throws { - // custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = try decoder.container( - keyedBy: Phi3Configuration.CodingKeys.self) - - hiddenSize = try container.decode(Int.self, forKey: Phi3Configuration.CodingKeys.hiddenSize) - hiddenLayers = try container.decode( - Int.self, forKey: Phi3Configuration.CodingKeys.hiddenLayers) - intermediateSize = try container.decode( - Int.self, forKey: Phi3Configuration.CodingKeys.intermediateSize) - attentionHeads = try container.decode( - Int.self, forKey: Phi3Configuration.CodingKeys.attentionHeads) - rmsNormEps = try container.decode( - Float.self, forKey: Phi3Configuration.CodingKeys.rmsNormEps) - vocabularySize = try container.decode( - Int.self, forKey: Phi3Configuration.CodingKeys.vocabularySize) - kvHeads = try container.decode(Int.self, forKey: Phi3Configuration.CodingKeys.kvHeads) - ropeTheta = - try container.decodeIfPresent( - Float.self, forKey: Phi3Configuration.CodingKeys.ropeTheta) ?? 10_000 - ropeTraditional = - try container.decodeIfPresent( - Bool.self, forKey: Phi3Configuration.CodingKeys.ropeTraditional) ?? false - ropeScaling = try container.decodeIfPresent( - RopeScalingWithFactorArrays.self, forKey: .ropeScaling) - partialRotaryFactor = - try container.decodeIfPresent( - Float.self, forKey: .partialRotaryFactor) ?? 1.0 - maxPositionEmbeddings = try container.decode(Int.self, forKey: .maxPositionEmbeddings) - originalMaxPositionEmbeddings = try container.decode( - Int.self, forKey: .originalMaxPositionEmbeddings) - tieWordEmbeddings = - try container.decodeIfPresent( - Bool.self, forKey: .tieWordEmbeddings) ?? false - } +@Codable +public struct Phi3Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("rope_theta") public var ropeTheta: Float = 10_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: RopeScalingWithFactorArrays? + @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float = 1.0 + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int + @CodingKey("original_max_position_embeddings") public var originalMaxPositionEmbeddings: Int + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/PhiMoE.swift b/Libraries/MLXLLM/Models/PhiMoE.swift index 2330f3b7..98dbb47f 100644 --- a/Libraries/MLXLLM/Models/PhiMoE.swift +++ b/Libraries/MLXLLM/Models/PhiMoE.swift @@ -2,41 +2,26 @@ import Foundation import MLX import MLXLMCommon import MLXNN - -// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/phimoe.py - -public struct PhiMoEConfiguration: Codable, Sendable { - var modelType: String = "phimoe" - var vocabularySize: Int = 32064 - var hiddenSize: Int = 4096 - var intermediateSize: Int = 6400 - var hiddenLayers: Int = 32 - var attentionHeads: Int = 32 - var kvHeads: Int = 8 - var maxPositionEmbeddings: Int = 131072 - var originalMaxPositionEmbeddings: Int = 4096 - var rmsNormEps: Float = 1e-6 - var ropeScaling: RopeScalingWithFactorArrays? - var numLocalExperts: Int = 16 - var numExpertsPerToken: Int = 2 - var ropeTheta: Float = 10000.0 - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case vocabularySize = "vocab_size" - case hiddenSize = "hidden_size" - case intermediateSize = "intermediate_size" - case hiddenLayers = "num_hidden_layers" - case attentionHeads = "num_attention_heads" - case kvHeads = "num_key_value_heads" - case maxPositionEmbeddings = "max_position_embeddings" - case originalMaxPositionEmbeddings = "original_max_position_embeddings" - case rmsNormEps = "rms_norm_eps" - case ropeScaling = "rope_scaling" - case numLocalExperts = "num_local_experts" - case numExpertsPerToken = "num_experts_per_tok" - case ropeTheta = "rope_theta" - } +import ReerCodable + +// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/phimoe.py + +@Codable +public struct PhiMoEConfiguration: Sendable { + @CodingKey("vocab_size") public var vocabularySize: Int = 32064 + @CodingKey("hidden_size") public var hiddenSize: Int = 4096 + @CodingKey("intermediate_size") public var intermediateSize: Int = 6400 + @CodingKey("num_hidden_layers") public var hiddenLayers: Int = 32 + @CodingKey("num_attention_heads") public var attentionHeads: Int = 32 + @CodingKey("num_key_value_heads") public var kvHeads: Int = 8 + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 131072 + @CodingKey("original_max_position_embeddings") public var originalMaxPositionEmbeddings: Int = + 4096 + @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6 + @CodingKey("rope_scaling") public var ropeScaling: RopeScalingWithFactorArrays? + @CodingKey("num_local_experts") public var numLocalExperts: Int = 16 + @CodingKey("num_experts_per_tok") public var numExpertsPerToken: Int = 2 + @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0 } private class Attention: Module { diff --git a/Libraries/MLXLLM/Models/Qwen2.swift b/Libraries/MLXLLM/Models/Qwen2.swift index d2f64432..afbe4758 100644 --- a/Libraries/MLXLLM/Models/Qwen2.swift +++ b/Libraries/MLXLLM/Models/Qwen2.swift @@ -9,8 +9,9 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable -// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/qwen2.py +// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/qwen2.py private class Attention: Module { let args: Qwen2Configuration @@ -208,64 +209,19 @@ public class Qwen2Model: Module, LLMModel, KVCacheDimensionProvider { } } -public struct Qwen2Configuration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var ropeTheta: Float = 1_000_000 - var ropeTraditional: Bool = false - var ropeScaling: [String: StringOrNumber]? = nil - var tieWordEmbeddings = false - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case ropeTheta = "rope_theta" - case ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case tieWordEmbeddings = "tie_word_embeddings" - } - - public init(from decoder: Decoder) throws { - // custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = - try decoder.container( - keyedBy: Qwen2Configuration.CodingKeys.self) - - self.hiddenSize = try container.decode( - Int.self, forKey: Qwen2Configuration.CodingKeys.hiddenSize) - self.hiddenLayers = try container.decode( - Int.self, forKey: Qwen2Configuration.CodingKeys.hiddenLayers) - self.intermediateSize = try container.decode( - Int.self, forKey: Qwen2Configuration.CodingKeys.intermediateSize) - self.attentionHeads = try container.decode( - Int.self, forKey: Qwen2Configuration.CodingKeys.attentionHeads) - self.rmsNormEps = try container.decode( - Float.self, forKey: Qwen2Configuration.CodingKeys.rmsNormEps) - self.vocabularySize = try container.decode( - Int.self, forKey: Qwen2Configuration.CodingKeys.vocabularySize) - self.kvHeads = try container.decode(Int.self, forKey: Qwen2Configuration.CodingKeys.kvHeads) - self.ropeTheta = - try container.decodeIfPresent( - Float.self, forKey: Qwen2Configuration.CodingKeys.ropeTheta) - ?? 1_000_000 - self.ropeTraditional = - try container.decodeIfPresent( - Bool.self, forKey: Qwen2Configuration.CodingKeys.ropeTraditional) ?? false - self.ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: Qwen2Configuration.CodingKeys.ropeScaling) - self.tieWordEmbeddings = - try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false - } +@Codable +public struct Qwen2Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Qwen3.swift b/Libraries/MLXLLM/Models/Qwen3.swift index 6e9e8bb9..e65fb844 100644 --- a/Libraries/MLXLLM/Models/Qwen3.swift +++ b/Libraries/MLXLLM/Models/Qwen3.swift @@ -9,6 +9,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/qwen3.py @@ -217,67 +218,20 @@ public class Qwen3Model: Module, LLMModel, KVCacheDimensionProvider { } } -public struct Qwen3Configuration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var ropeTheta: Float = 1_000_000 - var headDim: Int - var ropeScaling: [String: StringOrNumber]? = nil - var tieWordEmbeddings = false - var maxPositionEmbeddings: Int = 32768 - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case ropeTheta = "rope_theta" - case headDim = "head_dim" - case ropeScaling = "rope_scaling" - case tieWordEmbeddings = "tie_word_embeddings" - case maxPositionEmbeddings = "max_position_embeddings" - } - - public init(from decoder: Decoder) throws { - // custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = - try decoder.container( - keyedBy: Qwen3Configuration.CodingKeys.self) - - self.hiddenSize = try container.decode( - Int.self, forKey: Qwen3Configuration.CodingKeys.hiddenSize) - self.hiddenLayers = try container.decode( - Int.self, forKey: Qwen3Configuration.CodingKeys.hiddenLayers) - self.intermediateSize = try container.decode( - Int.self, forKey: Qwen3Configuration.CodingKeys.intermediateSize) - self.attentionHeads = try container.decode( - Int.self, forKey: Qwen3Configuration.CodingKeys.attentionHeads) - self.rmsNormEps = try container.decode( - Float.self, forKey: Qwen3Configuration.CodingKeys.rmsNormEps) - self.vocabularySize = try container.decode( - Int.self, forKey: Qwen3Configuration.CodingKeys.vocabularySize) - self.kvHeads = try container.decode(Int.self, forKey: Qwen3Configuration.CodingKeys.kvHeads) - self.ropeTheta = - try container.decodeIfPresent( - Float.self, forKey: Qwen3Configuration.CodingKeys.ropeTheta) - ?? 1_000_000 - self.headDim = try container.decode( - Int.self, forKey: Qwen3Configuration.CodingKeys.headDim) - self.ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: Qwen3Configuration.CodingKeys.ropeScaling) - self.tieWordEmbeddings = - try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false - self.maxPositionEmbeddings = - try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768 - } +@Codable +public struct Qwen3Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000 + @CodingKey("head_dim") public var headDim: Int + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768 } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Qwen3MoE.swift b/Libraries/MLXLLM/Models/Qwen3MoE.swift index 301b2c9f..a15b2184 100644 --- a/Libraries/MLXLLM/Models/Qwen3MoE.swift +++ b/Libraries/MLXLLM/Models/Qwen3MoE.swift @@ -9,6 +9,7 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/qwen3_moe.py @@ -278,76 +279,26 @@ public class Qwen3MoEModel: Module, LLMModel, KVCacheDimensionProvider { } } -public struct Qwen3MoEConfiguration: Codable, Sendable { - var modelType: String = "qwen3_moe" - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var numExperts: Int - var numExpertsPerToken: Int - var decoderSparseStep: Int - var mlpOnlyLayers: [Int] - var moeIntermediateSize: Int - var rmsNormEps: Float - var vocabularySize: Int - var kvHeads: Int - var headDim: Int - var ropeTheta: Float = 1_000_000 - var tieWordEmbeddings: Bool = false - var maxPositionEmbeddings: Int = 32768 - var normTopkProb: Bool = false - var ropeScaling: [String: StringOrNumber]? = nil - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case numExperts = "num_experts" - case numExpertsPerToken = "num_experts_per_tok" - case decoderSparseStep = "decoder_sparse_step" - case mlpOnlyLayers = "mlp_only_layers" - case moeIntermediateSize = "moe_intermediate_size" - case rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case headDim = "head_dim" - case ropeTheta = "rope_theta" - case tieWordEmbeddings = "tie_word_embeddings" - case maxPositionEmbeddings = "max_position_embeddings" - case normTopkProb = "norm_topk_prob" - case ropeScaling = "rope_scaling" - } - - public init(from decoder: Decoder) throws { - let container = try decoder.container(keyedBy: CodingKeys.self) - - self.modelType = - try container.decodeIfPresent(String.self, forKey: .modelType) ?? "qwen3_moe" - self.hiddenSize = try container.decode(Int.self, forKey: .hiddenSize) - self.hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers) - self.intermediateSize = try container.decode(Int.self, forKey: .intermediateSize) - self.attentionHeads = try container.decode(Int.self, forKey: .attentionHeads) - self.numExperts = try container.decode(Int.self, forKey: .numExperts) - self.numExpertsPerToken = try container.decode(Int.self, forKey: .numExpertsPerToken) - self.decoderSparseStep = try container.decode(Int.self, forKey: .decoderSparseStep) - self.mlpOnlyLayers = try container.decode([Int].self, forKey: .mlpOnlyLayers) - self.moeIntermediateSize = try container.decode(Int.self, forKey: .moeIntermediateSize) - self.rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps) - self.vocabularySize = try container.decode(Int.self, forKey: .vocabularySize) - self.kvHeads = try container.decode(Int.self, forKey: .kvHeads) - self.headDim = try container.decode(Int.self, forKey: .headDim) - self.ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) ?? 1_000_000 - self.tieWordEmbeddings = - try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false - self.maxPositionEmbeddings = - try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768 - self.normTopkProb = try container.decodeIfPresent(Bool.self, forKey: .normTopkProb) ?? false - self.ropeScaling = try container.decodeIfPresent( - [String: StringOrNumber].self, forKey: .ropeScaling) - } +@Codable +public struct Qwen3MoEConfiguration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("num_experts") public var numExperts: Int + @CodingKey("num_experts_per_tok") public var numExpertsPerToken: Int + @CodingKey("decoder_sparse_step") public var decoderSparseStep: Int + @CodingKey("mlp_only_layers") public var mlpOnlyLayers: [Int] + @CodingKey("moe_intermediate_size") public var moeIntermediateSize: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("head_dim") public var headDim: Int + @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000 + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768 + @CodingKey("norm_topk_prob") public var normTopkProb: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil } // MARK: - LoRA diff --git a/Libraries/MLXLLM/Models/Starcoder2.swift b/Libraries/MLXLLM/Models/Starcoder2.swift index 9dcc09af..a48e9923 100644 --- a/Libraries/MLXLLM/Models/Starcoder2.swift +++ b/Libraries/MLXLLM/Models/Starcoder2.swift @@ -9,8 +9,9 @@ import Foundation import MLX import MLXLMCommon import MLXNN +import ReerCodable -// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/starcoder2.py +// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/starcoder2.py private class Attention: Module { let args: Starcoder2Configuration @@ -178,70 +179,19 @@ public class Starcoder2Model: Module, LLMModel, KVCacheDimensionProvider { } } -public struct Starcoder2Configuration: Codable, Sendable { - var hiddenSize: Int - var hiddenLayers: Int - var intermediateSize: Int - var attentionHeads: Int - var kvHeads: Int - var maxPositionEmbeddings: Int = 16384 - var normEpsilon: Float = 1e-5 - var normType: String = "layer_norm" - var vocabularySize: Int = 49152 - var ropeTheta: Float = 100000 - var tieWordEmbeddings: Bool = true - - enum CodingKeys: String, CodingKey { - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case kvHeads = "num_key_value_heads" - case maxPositionEmbeddings = "max_position_embeddings" - case normEpsilon = "norm_epsilon" - case normType = "norm_type" - case vocabularySize = "vocab_size" - case ropeTheta = "rope_theta" - case tieWordEmbeddings = "tie_word_embeddings" - } - - public init(from decoder: Decoder) throws { - // custom implementation to handle optional keys with required values - let container: KeyedDecodingContainer = - try decoder.container( - keyedBy: Starcoder2Configuration.CodingKeys.self) - - self.hiddenSize = try container.decode( - Int.self, forKey: Starcoder2Configuration.CodingKeys.hiddenSize) - self.hiddenLayers = try container.decode( - Int.self, forKey: Starcoder2Configuration.CodingKeys.hiddenLayers) - self.intermediateSize = try container.decode( - Int.self, forKey: Starcoder2Configuration.CodingKeys.intermediateSize) - self.attentionHeads = try container.decode( - Int.self, forKey: Starcoder2Configuration.CodingKeys.attentionHeads) - self.kvHeads = try container.decode( - Int.self, forKey: Starcoder2Configuration.CodingKeys.kvHeads) - self.maxPositionEmbeddings = - try container.decodeIfPresent( - Int.self, forKey: Starcoder2Configuration.CodingKeys.maxPositionEmbeddings) ?? 16384 - self.normEpsilon = - try container.decodeIfPresent( - Float.self, forKey: Starcoder2Configuration.CodingKeys.normEpsilon) ?? 1e-5 - self.normType = - try container.decodeIfPresent( - String.self, forKey: Starcoder2Configuration.CodingKeys.normType) ?? "layer_norm" - self.vocabularySize = - try container.decodeIfPresent( - Int.self, forKey: Starcoder2Configuration.CodingKeys.vocabularySize) ?? 49152 - self.ropeTheta = - try container.decodeIfPresent( - Float.self, forKey: Starcoder2Configuration.CodingKeys.ropeTheta) - ?? 100000 - self.tieWordEmbeddings = - try container.decodeIfPresent( - Bool.self, forKey: Starcoder2Configuration.CodingKeys.tieWordEmbeddings) - ?? true - } +@Codable +public struct Starcoder2Configuration: Sendable { + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 16384 + @CodingKey("norm_epsilon") public var normEpsilon: Float = 1e-5 + @CodingKey("norm_type") public var normType: String = "layer_norm" + @CodingKey("vocab_size") public var vocabularySize: Int = 49152 + @CodingKey("rope_theta") public var ropeTheta: Float = 100000 + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true } // MARK: - LoRA diff --git a/Libraries/MLXLLM/README.md b/Libraries/MLXLLM/README.md index 16540fe6..fbb71984 100644 --- a/Libraries/MLXLLM/README.md +++ b/Libraries/MLXLLM/README.md @@ -11,7 +11,7 @@ This is a port of several models from: -- https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/ +- https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/ using the Hugging Face swift transformers package to provide tokenization: diff --git a/Libraries/MLXLLM/SwitchLayers.swift b/Libraries/MLXLLM/SwitchLayers.swift index c74b5987..d431bfe9 100644 --- a/Libraries/MLXLLM/SwitchLayers.swift +++ b/Libraries/MLXLLM/SwitchLayers.swift @@ -2,7 +2,7 @@ import Foundation import MLX import MLXNN -// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/switch_layers.py +// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/switch_layers.py private func gatherSort(x: MLXArray, indices: MLXArray) -> (MLXArray, MLXArray, MLXArray) { let m = indices.dim(-1) diff --git a/Libraries/MLXLMCommon/BaseConfiguration.swift b/Libraries/MLXLMCommon/BaseConfiguration.swift index d90784ec..80289d28 100644 --- a/Libraries/MLXLMCommon/BaseConfiguration.swift +++ b/Libraries/MLXLMCommon/BaseConfiguration.swift @@ -1,30 +1,23 @@ // Copyright © 2025 Apple Inc. import Foundation +import ReerCodable /// Base ``LanguageModel`` configuration -- provides `modelType` /// and `quantization` (used in loading the model). /// /// This is used by ``ModelFactory/load(hub:configuration:progressHandler:)`` /// to determine the type of model to load. -public struct BaseConfiguration: Codable, Sendable { - public let modelType: String +@Codable(memberwiseInit: false) +public struct BaseConfiguration: Sendable { + @CodingKey("model_type") public let modelType: String - public struct Quantization: Codable, Sendable, Equatable { - public init(groupSize: Int, bits: Int) { - self.groupSize = groupSize - self.bits = bits - } - - public let groupSize: Int + @Codable + public struct Quantization: Sendable, Equatable { + @CodingKey("group_size") public let groupSize: Int public let bits: Int public var asTuple: (Int, Int) { (groupSize, bits) } - - enum CodingKeys: String, CodingKey { - case groupSize = "group_size" - case bits = "bits" - } } /// handling instructions for ``PerLayerQuantization`` @@ -77,36 +70,25 @@ public struct BaseConfiguration: Codable, Sendable { /// /// This mixed type structure requires manual decoding. struct QuantizationContainer: Codable, Sendable { - var quantization: Quantization + var quantization: Quantization? var perLayerQuantization: PerLayerQuantization - // based on Dictionary's coding key - internal struct _DictionaryCodingKey: CodingKey { - internal let stringValue: String - internal let intValue: Int? - - internal init(stringValue: String) { - self.stringValue = stringValue - self.intValue = Int(stringValue) - } - - internal init(intValue: Int) { - self.stringValue = "\(intValue)" - self.intValue = intValue - } + internal init(quantization: Quantization?, perLayerQuantization: PerLayerQuantization) { + self.quantization = quantization + self.perLayerQuantization = perLayerQuantization } init(from decoder: any Decoder) throws { // handle the embedded Quantization - self.quantization = try Quantization(from: decoder) + self.quantization = try? Quantization(from: decoder) // and the interleaved per-layer values var perLayerQuantization = [String: QuantizationOption]() - let container = try decoder.container(keyedBy: _DictionaryCodingKey.self) + let container = try decoder.container(keyedBy: AnyCodingKey.self) for key in container.allKeys { switch key.stringValue { - case Quantization.CodingKeys.groupSize.rawValue: continue - case Quantization.CodingKeys.bits.rawValue: continue + case "group_size": continue + case "bits": continue default: if let f = try? container.decode(Bool.self, forKey: key) { @@ -126,19 +108,20 @@ public struct BaseConfiguration: Codable, Sendable { func encode(to encoder: any Encoder) throws { try quantization.encode(to: encoder) - var container = encoder.container(keyedBy: _DictionaryCodingKey.self) + var container = encoder.container(keyedBy: AnyCodingKey.self) for (key, value) in perLayerQuantization.perLayerQuantization { + guard let key = AnyCodingKey(stringValue: key) else { continue } switch value { case .skip: - try container.encode(false, forKey: .init(stringValue: key)) + try container.encode(false, forKey: key) case .quantize(let q): - try container.encode(q, forKey: .init(stringValue: key)) + try container.encode(q, forKey: key) } } } } - var quantizationContainer: QuantizationContainer? + @CodingKey("quantization") var quantizationContainer: QuantizationContainer? @available(*, deprecated, message: "Please use perLayerQuantization instead") public var quantization: Quantization? { @@ -149,8 +132,13 @@ public struct BaseConfiguration: Codable, Sendable { quantizationContainer?.perLayerQuantization } - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case quantizationContainer = "quantization" + public init( + modelType: String, quantization: Quantization? = nil, + perLayerQuantization: PerLayerQuantization? = nil + ) { + self.modelType = modelType + self.quantizationContainer = QuantizationContainer( + quantization: quantization, + perLayerQuantization: perLayerQuantization ?? .init(perLayerQuantization: [:])) } } diff --git a/Libraries/MLXVLM/Codable+Support.swift b/Libraries/MLXVLM/Codable+Support.swift new file mode 100644 index 00000000..84329926 --- /dev/null +++ b/Libraries/MLXVLM/Codable+Support.swift @@ -0,0 +1,5 @@ +import Foundation + +/// `swift-transformers` also declares a public `Decoder` and it conflicts with the `Codable` +/// implementations. +public typealias Decoder = Swift.Decoder diff --git a/Libraries/MLXVLM/Models/Idefics3.swift b/Libraries/MLXVLM/Models/Idefics3.swift index 17b1c5f4..642ac866 100644 --- a/Libraries/MLXVLM/Models/Idefics3.swift +++ b/Libraries/MLXVLM/Models/Idefics3.swift @@ -11,110 +11,50 @@ import Hub import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers // MARK: - Configuration -public struct Idefics3Configuration: Codable, Sendable { - - public struct TextConfiguration: Codable, Sendable { - public let modelType: String - public let hiddenSize: Int - public var numHiddenLayers: Int { _numHiddenLayers ?? 32 } - public let intermediateSize: Int - public let numAttentionHeads: Int - public let rmsNormEps: Float - public let vocabSize: Int - public let numKeyValueHeads: Int - public let ropeTheta: Float - public var ropeTraditional: Bool { _ropeTraditional ?? false } - public var tieWordEmbeddings: Bool { _tieWordEmbeddings ?? false } - - private let _numHiddenLayers: Int? - private let _ropeTraditional: Bool? - private let _tieWordEmbeddings: Bool? - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case _numHiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case numAttentionHeads = "num_attention_heads" - case rmsNormEps = "rms_norm_eps" - case vocabSize = "vocab_size" - case numKeyValueHeads = "num_key_value_heads" - case ropeTheta = "rope_theta" - case _ropeTraditional = "rope_traditional" - case _tieWordEmbeddings = "tie_word_embeddings" - } - } - - public struct VisionConfiguration: Codable, Sendable { - public let modelType: String - public var numHiddenLayers: Int { _numHiddenLayers ?? 12 } - public let hiddenSize: Int - public var intermediateSize: Int { _intermediateSize ?? 3072 } - public let numAttentionHeads: Int - public let patchSize: Int - public let imageSize: Int - public var numChannels: Int { _numChannels ?? 3 } - public var layerNormEps: Float { _layerNormEps ?? 1e-6 } - - private let _numHiddenLayers: Int? - private let _intermediateSize: Int? - private let _numChannels: Int? - private let _layerNormEps: Float? - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case _numHiddenLayers = "num_hidden_layers" - case hiddenSize = "hidden_size" - case _intermediateSize = "intermediate_size" - case numAttentionHeads = "num_attention_heads" - case patchSize = "patch_size" - case imageSize = "image_size" - case _numChannels = "num_channels" - case _layerNormEps = "layer_norm_eps" - } - } - - public let textConfig: TextConfiguration - public let visionConfig: VisionConfiguration - public let modelType: String - public let ignoreIndex: Int - public let vocabSize: Int - public let scaleFactor: Int - public let imageTokenId: Int - public let imageTokenIndex: Int - - enum CodingKeys: String, CodingKey { - case textConfig = "text_config" - case visionConfig = "vision_config" - case modelType = "model_type" - case ignoreIndex = "ignore_index" - case vocabSize = "vocab_size" - case scaleFactor = "scale_factor" - case imageTokenId = "image_token_id" - case imageTokenIndex = "image_token_index" - } - - public init(from decoder: any Swift.Decoder) throws { - let container = try decoder.container(keyedBy: CodingKeys.self) - - self.textConfig = - try container - .decode(TextConfiguration.self, forKey: .textConfig) - self.visionConfig = - try container - .decode(VisionConfiguration.self, forKey: .visionConfig) - self.modelType = try container.decode(String.self, forKey: .modelType) - self.ignoreIndex = (try? container.decode(Int.self, forKey: .ignoreIndex)) ?? -100 - self.vocabSize = (try? container.decode(Int.self, forKey: .vocabSize)) ?? 128259 - self.scaleFactor = (try? container.decode(Int.self, forKey: .scaleFactor)) ?? 2 - self.imageTokenId = (try? container.decode(Int.self, forKey: .imageTokenId)) ?? 49153 - self.imageTokenIndex = - (try? container.decode(Int.self, forKey: .imageTokenIndex)) ?? self.imageTokenId - } +@Codable +public struct Idefics3Configuration: Sendable { + + @Codable + public struct TextConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var numHiddenLayers: Int = 32 + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var numAttentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float + @CodingKey("vocab_size") public var vocabSize: Int + @CodingKey("num_key_value_heads") public var numKeyValueHeads: Int + @CodingKey("rope_theta") public var ropeTheta: Float + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false + } + + @Codable + public struct VisionConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("num_hidden_layers") public var numHiddenLayers: Int = 12 + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("intermediate_size") public var intermediateSize: Int = 3072 + @CodingKey("num_attention_heads") public var numAttentionHeads: Int + @CodingKey("patch_size") public var patchSize: Int + @CodingKey("image_size") public var imageSize: Int + @CodingKey("num_channels") public var numChannels: Int = 3 + @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6 + } + + @CodingKey("text_config") public var textConfig: TextConfiguration + @CodingKey("vision_config") public var visionConfig: VisionConfiguration + @CodingKey("model_type") public var modelType: String + @CodingKey("ignore_index") public var ignoreIndex: Int = -10 + @CodingKey("vocab_size") public var vocabSize: Int = 128259 + @CodingKey("scale_factor") public var scaleFactor: Int = 2 + @CodingKey("image_token_id") public var imageTokenId: Int = 49153 + @CodingKey("image_token_index", "image_token_id") public var imageTokenIndex: Int } // MARK: - Connector @@ -772,18 +712,18 @@ public class Idefics3: Module, VLMModel, KVCacheDimensionProvider { } // MARK: - Processor Configuration -public struct Idefics3ProcessorConfiguration: Codable, Sendable { - public struct Size: Codable, Sendable { - public let longestEdge: Int - enum CodingKeys: String, CodingKey { - case longestEdge = "longest_edge" - } +@Codable +public struct Idefics3ProcessorConfiguration: Sendable { + + @Codable + public struct Size: Sendable { + @CodingKey("longest_edge") public var longestEdge: Int } - public let imageMean: [CGFloat] - public let imageStd: [CGFloat] - public let size: Size - public let imageSequenceLength: Int? + @CodingKey("image_mean") public var imageMean: [CGFloat] + @CodingKey("image_std") public var imageStd: [CGFloat] + public var size: Size + @CodingKey("image_seq_len") public var imageSequenceLength: Int? public var imageMeanTuple: (CGFloat, CGFloat, CGFloat) { (imageMean[0], imageMean[1], imageMean[2]) @@ -791,13 +731,6 @@ public struct Idefics3ProcessorConfiguration: Codable, Sendable { public var imageStdTuple: (CGFloat, CGFloat, CGFloat) { (imageStd[0], imageStd[1], imageStd[2]) } - - enum CodingKeys: String, CodingKey { - case imageMean = "image_mean" - case imageStd = "image_std" - case size - case imageSequenceLength = "image_seq_len" - } } // MARK: - Processor diff --git a/Libraries/MLXVLM/Models/Paligemma.swift b/Libraries/MLXVLM/Models/Paligemma.swift index 3cd3c5de..89e551f5 100644 --- a/Libraries/MLXVLM/Models/Paligemma.swift +++ b/Libraries/MLXVLM/Models/Paligemma.swift @@ -8,6 +8,7 @@ import Hub import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers // MARK: - Language @@ -618,100 +619,63 @@ public class PaliGemma: Module, VLMModel, KVCacheDimensionProvider { // MARK: - Configuration /// Confguration for ``PaliGemma`` -public struct PaliGemmaConfiguration: Codable, Sendable { - - public struct TextConfiguration: Codable, Sendable { - public let modelType: String - public let hiddenSize: Int - public let hiddenLayers: Int - public let intermediateSize: Int - public let attentionHeads: Int - public let kvHeads: Int - public let vocabularySize: Int - private let _rmsNormEps: Float? - public var rmsNormEps: Float { _rmsNormEps ?? 1e-6 } - private let _ropeTheta: Float? - public var ropeTheta: Float { _ropeTheta ?? 10_000 } - private let _ropeTraditional: Bool? - public var ropeTraditional: Bool { _ropeTraditional ?? false } - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case kvHeads = "num_key_value_heads" - case vocabularySize = "vocab_size" - case _rmsNormEps = "rms_norm_eps" - case _ropeTheta = "rope_theta" - case _ropeTraditional = "rope_traditional" - } - } - - public struct VisionConfiguration: Codable, Sendable { - public let modelType: String - public let hiddenSize: Int - public let hiddenLayers: Int - public let intermediateSize: Int - public let attentionHeads: Int - public let patchSize: Int - public let projectionDimensions: Int - public let imageSize: Int - private let _channels: Int? - public var channels: Int { _channels ?? 3 } - private let _layerNormEps: Float? - public var layerNormEps: Float { _layerNormEps ?? 1e-6 } - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case patchSize = "patch_size" - case projectionDimensions = "projection_dim" - case imageSize = "image_size" - case _channels = "num_channels" - case _layerNormEps = "layer_norm_eps" - } - } - - public let textConfiguration: TextConfiguration - public let visionConfiguration: VisionConfiguration - public let modelType: String - public let vocabularySize: Int - public let ignoreIndex: Int - public let imageTokenIndex: Int - public let hiddenSize: Int - public let padTokenId: Int - - enum CodingKeys: String, CodingKey { - case textConfiguration = "text_config" - case visionConfiguration = "vision_config" - case modelType = "model_type" - case vocabularySize = "vocab_size" - case ignoreIndex = "ignore_index" - case imageTokenIndex = "image_token_index" - case hiddenSize = "hidden_size" - case padTokenId = "pad_token_id" - } +@Codable +public struct PaliGemmaConfiguration: Sendable { + + @Codable + public struct TextConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6 + @CodingKey("rope_theta") public var ropeTheta: Float = 10_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + } + + @Codable + public struct VisionConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("patch_size") public var patchSize: Int + @CodingKey("projection_dim") public var projectionDimensions: Int + @CodingKey("image_size") public var imageSize: Int + @CodingKey("num_channels") public var channels: Int = 3 + @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6 + } + + @CodingKey("text_config") public var textConfiguration: TextConfiguration + @CodingKey("vision_config") public var visionConfiguration: VisionConfiguration + @CodingKey("model_type") public var modelType: String + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("ignore_index") public var ignoreIndex: Int + @CodingKey("image_token_index") public var imageTokenIndex: Int + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("pad_token_id") public var padTokenId: Int } /// Configuration for ``PaliGemmaProcessor`` -public struct PaliGemmaProcessorConfiguration: Codable, Sendable { +@Codable +public struct PaliGemmaProcessorConfiguration: Sendable { - public struct Size: Codable, Sendable { - public let width: Int - public let height: Int + @Codable + public struct Size: Sendable { + public var width: Int + public var height: Int var cgSize: CGSize { .init(width: width, height: height) } } - public let imageMean: [CGFloat] - public let imageStd: [CGFloat] - public let size: Size - public let imageSequenceLength: Int + @CodingKey("image_mean") public var imageMean: [CGFloat] + @CodingKey("image_std") public var imageStd: [CGFloat] + public var size: Size + @CodingKey("image_seq_length") public var imageSequenceLength: Int public var imageMeanTuple: (CGFloat, CGFloat, CGFloat) { (imageMean[0], imageMean[1], imageMean[2]) @@ -719,11 +683,4 @@ public struct PaliGemmaProcessorConfiguration: Codable, Sendable { public var imageStdTuple: (CGFloat, CGFloat, CGFloat) { (imageStd[0], imageStd[1], imageStd[2]) } - - enum CodingKeys: String, CodingKey { - case imageMean = "image_mean" - case imageStd = "image_std" - case size - case imageSequenceLength = "image_seq_length" - } } diff --git a/Libraries/MLXVLM/Models/Qwen25VL.swift b/Libraries/MLXVLM/Models/Qwen25VL.swift index cfd346e4..33cbbc6d 100644 --- a/Libraries/MLXVLM/Models/Qwen25VL.swift +++ b/Libraries/MLXVLM/Models/Qwen25VL.swift @@ -6,6 +6,7 @@ import Hub import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers // MARK: - Language @@ -904,125 +905,62 @@ public class Qwen25VL: Module, VLMModel, KVCacheDimensionProvider { /// Configuration for ``Qwen25VL`` public struct Qwen25VLConfiguration: Codable, Sendable { - public struct TextConfiguration: Codable, Sendable { - public let modelType: String - public let hiddenSize: Int - public let hiddenLayers: Int - public let intermediateSize: Int - public let attentionHeads: Int - private let _rmsNormEps: Float? - public var rmsNormEps: Float { _rmsNormEps ?? 1e-6 } - public let vocabularySize: Int - public let kvHeads: Int - private let _maxPositionEmbeddings: Int? - public var maxPositionEmbeddings: Int { _maxPositionEmbeddings ?? 128000 } - private let _ropeTheta: Float? - public var ropeTheta: Float { _ropeTheta ?? 1_000_000 } - private let _ropeTraditional: Bool? - public var ropeTraditional: Bool { _ropeTraditional ?? false } - public let ropeScaling: [String: StringOrNumber]? - private let _tieWordEmbeddings: Bool? - public var tieWordEmbeddings: Bool { _tieWordEmbeddings ?? true } - private let _slidingWindow: Int? - public var slidingWindow: Int { _slidingWindow ?? 32768 } - private let _useSlidingWindow: Bool? - public var useSlidingWindow: Bool { _useSlidingWindow ?? false } - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case _rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case _maxPositionEmbeddings = "max_position_embeddings" - case _ropeTheta = "rope_theta" - case _ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case _tieWordEmbeddings = "tie_word_embeddings" - case _slidingWindow = "sliding_window" - case _useSlidingWindow = "use_sliding_window" - } + @Codable + public struct TextConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6 + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 128000 + @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true + @CodingKey("sliding_window") public var slidingWindow: Int = 32768 + @CodingKey("use_sliding_window") public var useSlidingWindow: Bool = false } - public struct VisionConfiguration: Codable, Sendable { - public let depth: Int - public let hiddenSize: Int - public let intermediateSize: Int - public let outHiddenSize: Int - public let numHeads: Int - public let patchSize: Int - private let _inChans: Int? - public var inChannels: Int { _inChans ?? 3 } - private let _layerNormEps: Float? - public var layerNormEps: Float { _layerNormEps ?? 1e-6 } - public let spatialPatchSize: Int - public let spatialMergeSize: Int - public let temporalPatchSize: Int - public let windowSize: Int - public let fullattBlockIndexes: [Int] - public let tokensPerSecond: Int - private let _skipVision: Bool? - public var skipVision: Bool { _skipVision ?? false } - private let _hiddenAct: String? - public var hiddenAct: String { _hiddenAct ?? "silu" } - - enum CodingKeys: String, CodingKey { - case depth - case hiddenSize = "hidden_size" - case intermediateSize = "intermediate_size" - case outHiddenSize = "out_hidden_size" - case numHeads = "num_heads" - case patchSize = "patch_size" - case _inChans = "in_chans" - case _layerNormEps = "layer_norm_eps" // Added this line - case spatialPatchSize = "spatial_patch_size" - case spatialMergeSize = "spatial_merge_size" - case temporalPatchSize = "temporal_patch_size" - case windowSize = "window_size" - case fullattBlockIndexes = "fullatt_block_indexes" - case tokensPerSecond = "tokens_per_second" - case _skipVision = "skip_vision" - case _hiddenAct = "hidden_act" - } + @Codable + public struct VisionConfiguration: Sendable { + public var depth: Int + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("out_hidden_size") public var outHiddenSize: Int + @CodingKey("num_heads") public var numHeads: Int + @CodingKey("patch_size") public var patchSize: Int + @CodingKey("in_chans") public var inChannels: Int = 3 + @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6 + @CodingKey("spatial_patch_size") public var spatialPatchSize: Int + @CodingKey("spatial_merge_size") public var spatialMergeSize: Int + @CodingKey("temporal_patch_size") public var temporalPatchSize: Int + @CodingKey("window_size") public var windowSize: Int + @CodingKey("fullatt_block_indexes") public var fullattBlockIndexes: [Int] + @CodingKey("tokens_per_second") public var tokensPerSecond: Int + @CodingKey("skip_vision") public var skipVision: Bool = false + @CodingKey("hidden_act") public var hiddenAct: String = "silu" } + @Codable public struct BaseConfiguration: Codable, Sendable { - public let modelType: String - public let vocabularySize: Int - public let imageTokenId: Int - public let videoTokenId: Int - public let visionStartTokenId: Int - public let visionEndTokenId: Int - public let visionTokenId: Int - public let hiddenSize: Int - public let numAttentionHeads: Int - public let numHiddenLayers: Int - public let intermediateSize: Int - public let numKeyValueHeads: Int - public let slidingWindow: Int - public let useSlidingWindow: Bool - public let maxWindowLayers: Int - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case vocabularySize = "vocab_size" - case imageTokenId = "image_token_id" - case videoTokenId = "video_token_id" - case visionStartTokenId = "vision_start_token_id" - case visionEndTokenId = "vision_end_token_id" - case visionTokenId = "vision_token_id" - case hiddenSize = "hidden_size" - case numAttentionHeads = "num_attention_heads" - case numHiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case numKeyValueHeads = "num_key_value_heads" - case slidingWindow = "sliding_window" - case useSlidingWindow = "use_sliding_window" - case maxWindowLayers = "max_window_layers" - } + @CodingKey("model_type") public var modelType: String + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("image_token_id") public var imageTokenId: Int + @CodingKey("video_token_id") public var videoTokenId: Int + @CodingKey("vision_start_token_id") public var visionStartTokenId: Int + @CodingKey("vision_end_token_id") public var visionEndTokenId: Int + @CodingKey("vision_token_id") public var visionTokenId: Int + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_attention_heads") public var numAttentionHeads: Int + @CodingKey("num_hidden_layers") public var numHiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_key_value_heads") public var numKeyValueHeads: Int + @CodingKey("sliding_window") public var slidingWindow: Int + @CodingKey("use_sliding_window") public var useSlidingWindow: Bool + @CodingKey("max_window_layers") public var maxWindowLayers: Int } public let textConfiguration: TextConfiguration @@ -1044,6 +982,14 @@ public struct Qwen25VLConfiguration: Codable, Sendable { self.textConfiguration = try TextConfiguration(from: decoder) self.baseConfiguration = try BaseConfiguration(from: decoder) } + + public func encode(to encoder: any Encoder) throws { + var container = try encoder.container(keyedBy: CodingKeys.self) + + try container.encode(visionConfiguration, forKey: .visionConfiguration) + try textConfiguration.encode(to: encoder) + try baseConfiguration.encode(to: encoder) + } } /// Configuration for ``Qwen25VLProcessor`` diff --git a/Libraries/MLXVLM/Models/Qwen2VL.swift b/Libraries/MLXVLM/Models/Qwen2VL.swift index 8d735054..f5fae762 100644 --- a/Libraries/MLXVLM/Models/Qwen2VL.swift +++ b/Libraries/MLXVLM/Models/Qwen2VL.swift @@ -8,6 +8,7 @@ import Hub import MLX import MLXLMCommon import MLXNN +import ReerCodable import Tokenizers // MARK: - Language @@ -746,87 +747,45 @@ public class Qwen2VL: Module, VLMModel, KVCacheDimensionProvider { /// Configuration for ``Qwen2VL`` public struct Qwen2VLConfiguration: Codable, Sendable { - public struct TextConfiguration: Codable, Sendable { - public let modelType: String - public let hiddenSize: Int - public let hiddenLayers: Int - public let intermediateSize: Int - public let attentionHeads: Int - private let _rmsNormEps: Float? - public var rmsNormEps: Float { _rmsNormEps ?? 1e-6 } - public let vocabularySize: Int - public let kvHeads: Int - private let _maxPositionEmbeddings: Int? - public var maxpPositionEmbeddings: Int { _maxPositionEmbeddings ?? 32768 } - private let _ropeTheta: Float? - public var ropeTheta: Float { _ropeTheta ?? 1_000_000 } - private let _ropeTraditional: Bool? - public var ropeTraditional: Bool { _ropeTraditional ?? false } - public let ropeScaling: [String: StringOrNumber]? - private let _tieWordEmbeddings: Bool? - public var tieWordEmbeddings: Bool { _tieWordEmbeddings ?? true } - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case hiddenSize = "hidden_size" - case hiddenLayers = "num_hidden_layers" - case intermediateSize = "intermediate_size" - case attentionHeads = "num_attention_heads" - case _rmsNormEps = "rms_norm_eps" - case vocabularySize = "vocab_size" - case kvHeads = "num_key_value_heads" - case _maxPositionEmbeddings = "max_position_embeddings" - case _ropeTheta = "rope_theta" - case _ropeTraditional = "rope_traditional" - case ropeScaling = "rope_scaling" - case _tieWordEmbeddings = "tie_word_embeddings" - } - } - - public struct VisionConfiguration: Codable, Sendable { - public let depth: Int - public let embedDimensions: Int - public let hiddenSize: Int - public let numHeads: Int - public let patchSize: Int - public let mlpRatio: Float - public let _inChannels: Int? - public var inChannels: Int { _inChannels ?? 3 } - public let _layerNormEps: Float? - public var layerNormEps: Float { _layerNormEps ?? 1e-6 } - public let spatialPatchSize: Int - public let spatialMergeSize: Int - public let temporalPatchSize: Int - - enum CodingKeys: String, CodingKey { - case depth - case embedDimensions = "embed_dim" - case hiddenSize = "hidden_size" - case numHeads = "num_heads" - case patchSize = "patch_size" - case mlpRatio = "mlp_ratio" - case _inChannels = "in_channels" - case _layerNormEps = "layer_norm_eps" - case spatialPatchSize = "spatial_patch_size" - case spatialMergeSize = "spatial_merge_size" - case temporalPatchSize = "temporal_patch_size" - } - } - - public struct BaseConfiguration: Codable, Sendable { - public let modelType: String - public let vocabularySize: Int - public let imageTokenId: Int - public let videoTokenId: Int - public let hiddenSize: Int - - enum CodingKeys: String, CodingKey { - case modelType = "model_type" - case vocabularySize = "vocab_size" - case imageTokenId = "image_token_id" - case videoTokenId = "video_token_id" - case hiddenSize = "hidden_size" - } + @Codable + public struct TextConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_hidden_layers") public var hiddenLayers: Int + @CodingKey("intermediate_size") public var intermediateSize: Int + @CodingKey("num_attention_heads") public var attentionHeads: Int + @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6 + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("num_key_value_heads") public var kvHeads: Int + @CodingKey("max_position_embeddings") public var maxpPositionEmbeddings: Int = 32768 + @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000 + @CodingKey("rope_traditional") public var ropeTraditional: Bool = false + @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? + @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true + } + + @Codable + public struct VisionConfiguration: Sendable { + public var depth: Int + @CodingKey("embed_dim") public var embedDimensions: Int + @CodingKey("hidden_size") public var hiddenSize: Int + @CodingKey("num_heads") public var numHeads: Int + @CodingKey("patch_size") public var patchSize: Int + @CodingKey("mlp_ratio") public var mlpRatio: Float + @CodingKey("in_channels") public var inChannels: Int = 3 + @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6 + @CodingKey("spatial_patch_size") public var spatialPatchSize: Int + @CodingKey("spatial_merge_size") public var spatialMergeSize: Int + @CodingKey("temporal_patch_size") public var temporalPatchSize: Int + } + + @Codable + public struct BaseConfiguration: Sendable { + @CodingKey("model_type") public var modelType: String + @CodingKey("vocab_size") public var vocabularySize: Int + @CodingKey("image_token_id") public var imageTokenId: Int + @CodingKey("video_token_id") public var videoTokenId: Int + @CodingKey("hidden_size") public var hiddenSize: Int } public let textConfiguration: TextConfiguration @@ -848,30 +807,34 @@ public struct Qwen2VLConfiguration: Codable, Sendable { self.textConfiguration = try TextConfiguration(from: decoder) self.baseConfiguration = try BaseConfiguration(from: decoder) } + + public func encode(to encoder: any Encoder) throws { + var container = try encoder.container(keyedBy: CodingKeys.self) + + try container.encode(visionConfiguration, forKey: .visionConfiguration) + try textConfiguration.encode(to: encoder) + try baseConfiguration.encode(to: encoder) + } } /// Configuration for ``Qwen2VLProcessor`` -public struct Qwen2VLProcessorConfiguration: Codable, Sendable { +@Codable +public struct Qwen2VLProcessorConfiguration: Sendable { - public struct Size: Codable, Sendable { - public let maxPixels: Int - public let minPixels: Int - - enum CodingKeys: String, CodingKey { - case maxPixels = "max_pixels" - case minPixels = "min_pixels" - } + @Codable + public struct Size: Sendable { + @CodingKey("max_pixels") public var maxPixels: Int + @CodingKey("min_pixels") public var minPixels: Int } - public let imageMean: [CGFloat] - public let imageStd: [CGFloat] - public let mergeSize: Int - public let patchSize: Int - public let temporalPatchSize: Int - - private let _size: Size? - private let _maxPixels: Int? - private let _minPixels: Int? + @CodingKey("image_mean") public var imageMean: [CGFloat] + @CodingKey("image_std") public var imageStd: [CGFloat] + @CodingKey("merge_size") public var mergeSize: Int + @CodingKey("patch_size") public var patchSize: Int + @CodingKey("temporal_patch_size") public var temporalPatchSize: Int + @CodingKey("max_pixels") private var _maxPixels: Int? + @CodingKey("min_pixels") private var _minPixels: Int? + @CodingKey("size") private var _size: Size? public var minPixels: Int { _minPixels ?? _size?.minPixels ?? 3136 @@ -886,17 +849,6 @@ public struct Qwen2VLProcessorConfiguration: Codable, Sendable { public var imageStdTuple: (CGFloat, CGFloat, CGFloat) { (imageStd[0], imageStd[1], imageStd[2]) } - - enum CodingKeys: String, CodingKey { - case imageMean = "image_mean" - case imageStd = "image_std" - case mergeSize = "merge_size" - case patchSize = "patch_size" - case temporalPatchSize = "temporal_patch_size" - case _maxPixels = "max_pixels" - case _minPixels = "min_pixels" - case _size = "size" - } } /// Message Generator for Qwen2VL diff --git a/Libraries/MLXVLM/Models/SmolVLM2.swift b/Libraries/MLXVLM/Models/SmolVLM2.swift index b75a9717..b6756038 100644 --- a/Libraries/MLXVLM/Models/SmolVLM2.swift +++ b/Libraries/MLXVLM/Models/SmolVLM2.swift @@ -10,6 +10,7 @@ import CoreMedia import Foundation import MLX import MLXLMCommon +import ReerCodable import Tokenizers // MARK: - Configuration and modeling are Idefics3 @@ -18,47 +19,28 @@ typealias SmolVLM2Configuration = Idefics3Configuration typealias SmolVLM2 = Idefics3 // MARK: - SmolVLMProcessor and configuration +@Codable +public struct SmolVLMProcessorConfiguration: Sendable { -public struct SmolVLMProcessorConfiguration: Codable, Sendable { - public struct Size: Codable, Sendable { - public let longestEdge: Int - enum CodingKeys: String, CodingKey { - case longestEdge = "longest_edge" - } + @Codable + public struct Size: Sendable { + @CodingKey("longest_edge") public var longestEdge: Int } - public struct VideoSampling: Codable, Sendable { - public let fps: Int - public let maxFrames: Int + @Codable + public struct VideoSampling: Sendable { + public var fps: Int + @CodingKey("max_frames") public var maxFrames: Int // Intentionally ignoring videoSize because I believe it's still wrong in the config files // public let videoSize: Size - - enum CodingKeys: String, CodingKey { - case fps - case maxFrames = "max_frames" - } } - public let imageMean: [CGFloat] - public let imageStd: [CGFloat] - public let size: Size - public let maxImageSize: Size - public let videoSampling: VideoSampling - private let _imageSequenceLength: Int? - // TODO: this does not come in preprocessor_config.json, verify where transformers gets it from - public var imageSequenceLength: Int { _imageSequenceLength ?? 64 } - - init( - imageMean: [CGFloat], imageStd: [CGFloat], size: Size, maxImageSize: Size, - videoSampling: VideoSampling, imageSequenceLength: Int? - ) { - self.imageMean = imageMean - self.imageStd = imageStd - self.size = size - self.maxImageSize = maxImageSize - self.videoSampling = videoSampling - self._imageSequenceLength = imageSequenceLength - } + @CodingKey("image_mean") public var imageMean: [CGFloat] + @CodingKey("image_std") public var imageStd: [CGFloat] + public var size: Size + @CodingKey("max_image_size") public var maxImageSize: Size + @CodingKey("video_sampling") public var videoSampling: VideoSampling + @CodingKey("image_seq_len") public var imageSequenceLength: Int public var imageMeanTuple: (CGFloat, CGFloat, CGFloat) { (imageMean[0], imageMean[1], imageMean[2]) @@ -66,15 +48,6 @@ public struct SmolVLMProcessorConfiguration: Codable, Sendable { public var imageStdTuple: (CGFloat, CGFloat, CGFloat) { (imageStd[0], imageStd[1], imageStd[2]) } - - enum CodingKeys: String, CodingKey { - case imageMean = "image_mean" - case imageStd = "image_std" - case size - case maxImageSize = "max_image_size" - case videoSampling = "video_sampling" - case _imageSequenceLength = "image_seq_len" - } } public class SmolVLMProcessor: UserInputProcessor { diff --git a/Libraries/StableDiffusion/Configuration.swift b/Libraries/StableDiffusion/Configuration.swift index c39a06ff..e8afa346 100644 --- a/Libraries/StableDiffusion/Configuration.swift +++ b/Libraries/StableDiffusion/Configuration.swift @@ -7,7 +7,7 @@ import MLXNN // port of https://github.com/ml-explore/mlx-examples/blob/main/stable_diffusion/stable_diffusion/config.py /// Configuration for ``Autoencoder`` -struct AutoencoderConfiguration: Codable { +struct AutoencoderConfiguration: Codable, Sendable { public var inputChannels = 3 public var outputChannels = 3 @@ -60,7 +60,7 @@ struct AutoencoderConfiguration: Codable { } /// Configuration for ``CLIPTextModel`` -struct CLIPTextModelConfiguration: Codable { +struct CLIPTextModelConfiguration: Codable, Sendable { public enum ClipActivation: String, Codable { case fast = "quick_gelu" @@ -137,7 +137,7 @@ struct CLIPTextModelConfiguration: Codable { } /// Configuration for ``UNetModel`` -struct UNetConfiguration: Codable { +struct UNetConfiguration: Codable, Sendable { public var inputChannels = 4 public var outputChannels = 4 @@ -250,7 +250,7 @@ struct UNetConfiguration: Codable { } /// Configuration for ``StableDiffusion`` -public struct DiffusionConfiguration: Codable { +public struct DiffusionConfiguration: Codable, Sendable { public enum BetaSchedule: String, Codable { case linear = "linear" diff --git a/Package.swift b/Package.swift index 4233f481..1affcee1 100644 --- a/Package.swift +++ b/Package.swift @@ -31,6 +31,7 @@ let package = Package( .package( url: "https://github.com/huggingface/swift-transformers", .upToNextMinor(from: "0.1.21") ), + .package(url: "https://github.com/reers/ReerCodable.git", from: "1.2.3"), .package(url: "https://github.com/1024jp/GzipSwift", "6.0.1" ... "6.0.1"), // Only needed by MLXMNIST ], targets: [ @@ -44,6 +45,7 @@ let package = Package( .product(name: "MLXOptimizers", package: "mlx-swift"), .product(name: "MLXRandom", package: "mlx-swift"), .product(name: "Transformers", package: "swift-transformers"), + .product(name: "ReerCodable", package: "ReerCodable"), ], path: "Libraries/MLXLLM", exclude: [ @@ -63,6 +65,7 @@ let package = Package( .product(name: "MLXOptimizers", package: "mlx-swift"), .product(name: "MLXRandom", package: "mlx-swift"), .product(name: "Transformers", package: "swift-transformers"), + .product(name: "ReerCodable", package: "ReerCodable"), ], path: "Libraries/MLXVLM", exclude: [ @@ -81,6 +84,7 @@ let package = Package( .product(name: "MLXRandom", package: "mlx-swift"), .product(name: "MLXLinalg", package: "mlx-swift"), .product(name: "Transformers", package: "swift-transformers"), + .product(name: "ReerCodable", package: "ReerCodable"), ], path: "Libraries/MLXLMCommon", exclude: [ @@ -150,6 +154,7 @@ let package = Package( .product(name: "MLXNN", package: "mlx-swift"), .product(name: "MLXRandom", package: "mlx-swift"), .product(name: "Transformers", package: "swift-transformers"), + .product(name: "ReerCodable", package: "ReerCodable"), ], path: "Libraries/StableDiffusion", exclude: [ diff --git a/mlx-swift-examples.xcodeproj/project.pbxproj b/mlx-swift-examples.xcodeproj/project.pbxproj index df8f9ccd..2c0f3985 100644 --- a/mlx-swift-examples.xcodeproj/project.pbxproj +++ b/mlx-swift-examples.xcodeproj/project.pbxproj @@ -1042,6 +1042,7 @@ C397D8F22CD2F60B00B87EE2 /* XCLocalSwiftPackageReference "Libraries/.." */, C32A18442D00E13E0092A5B6 /* XCRemoteSwiftPackageReference "mlx-swift" */, C32B4C6B2DA7132C00EF663D /* XCRemoteSwiftPackageReference "swift-async-algorithms" */, + C3FF946F2DD54E170070900D /* XCRemoteSwiftPackageReference "ReerCodable" */, ); productRefGroup = C39273752B606A0A00368D5D /* Products */; projectDirPath = ""; @@ -3285,6 +3286,14 @@ minimumVersion = 1.4.0; }; }; + C3FF946F2DD54E170070900D /* XCRemoteSwiftPackageReference "ReerCodable" */ = { + isa = XCRemoteSwiftPackageReference; + repositoryURL = "https://github.com/reers/ReerCodable.git"; + requirement = { + kind = upToNextMajorVersion; + minimumVersion = 1.2.3; + }; + }; /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ diff --git a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index ee2466cb..510a7fe6 100644 --- a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -46,6 +46,15 @@ "version" : "0.4.0" } }, + { + "identity" : "reercodable", + "kind" : "remoteSourceControl", + "location" : "https://github.com/reers/ReerCodable.git", + "state" : { + "revision" : "a7b1eb0e93da899163f929e15608c325845e3430", + "version" : "1.2.3" + } + }, { "identity" : "swift-argument-parser", "kind" : "remoteSourceControl", @@ -100,6 +109,15 @@ "version" : "1.0.3" } }, + { + "identity" : "swift-syntax", + "kind" : "remoteSourceControl", + "location" : "https://github.com/swiftlang/swift-syntax.git", + "state" : { + "revision" : "f99ae8aa18f0cf0d53481901f88a0991dc3bd4a2", + "version" : "601.0.1" + } + }, { "identity" : "swift-transformers", "kind" : "remoteSourceControl",