diff --git a/.circleci/config.yml b/.circleci/config.yml
index 5a97f48a..8ffa27c1 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -38,7 +38,7 @@ jobs:
             xcrun --show-sdk-build-version
             swift --version
             find . -name Package.resolved -exec rm {} \;
-            xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X'
+            xcodebuild test -scheme mlx-libraries-Package -destination 'platform=OS X' -skipMacroValidation
       - run:
           name: Build Examples
           command: |
@@ -46,9 +46,9 @@ jobs:
             xcrun --show-sdk-build-version
             swift --version
             find . -name Package.resolved -exec rm {} \;
-            xcodebuild -scheme llm-tool
-            xcodebuild -scheme image-tool
-            xcodebuild -scheme mnist-tool
+            xcodebuild -scheme llm-tool -skipMacroValidation
+            xcodebuild -scheme image-tool -skipMacroValidation
+            xcodebuild -scheme mnist-tool -skipMacroValidation
 
 workflows:
   build_and_test:
diff --git a/Libraries/Embedders/Pooling.swift b/Libraries/Embedders/Pooling.swift
index 912f37b8..c8c65074 100644
--- a/Libraries/Embedders/Pooling.swift
+++ b/Libraries/Embedders/Pooling.swift
@@ -5,7 +5,7 @@ import MLX
 import MLXLinalg
 import MLXNN
 
-public struct PoolingConfiguration: Codable {
+public struct PoolingConfiguration: Codable, Sendable {
     public let dimension: Int
     public let poolingModeClsToken: Bool
     public let poolingModeMeanTokens: Bool
diff --git a/Libraries/MLXLLM/Lora+Data.swift b/Libraries/MLXLLM/Lora+Data.swift
index 975e41f4..defa8f5a 100644
--- a/Libraries/MLXLLM/Lora+Data.swift
+++ b/Libraries/MLXLLM/Lora+Data.swift
@@ -48,7 +48,7 @@ public func loadLoRAData(url: URL) throws -> [String] {
 
 func loadJSONL(url: URL) throws -> [String] {
 
-    struct Line: Codable {
+    struct Line: Codable, Sendable {
         let text: String?
     }
 
diff --git a/Libraries/MLXLLM/Models/Cohere.swift b/Libraries/MLXLLM/Models/Cohere.swift
index 5a8c1bed..647d358f 100644
--- a/Libraries/MLXLLM/Models/Cohere.swift
+++ b/Libraries/MLXLLM/Models/Cohere.swift
@@ -2,8 +2,9 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
-// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/cohere.py
+// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/cohere.py
 
 private class Attention: Module {
 
@@ -168,63 +169,21 @@ public class CohereModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct CohereConfiguration: Codable, Sendable {
-
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var layerNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 8000000.0
-    var ropeTraditional: Bool = true
-    var ropeScaling: [String: StringOrNumber]? = nil
-    var logitScale: Float
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case vocabularySize = "vocab_size"
-        case layerNormEps = "layer_norm_eps"
-        case logitScale = "logit_scale"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-    }
+@Codable
+public struct CohereConfiguration: Sendable {
+
+    @CodingKey("hidden_size") public var hiddenSize: Int = 8192
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int = 40
+    @CodingKey("intermediate_size") public var intermediateSize: Int = 22528
+    @CodingKey("num_attention_heads") public var attentionHeads: Int = 64
+    @CodingKey("num_key_value_heads") public var layerNormEps: Float = 1e-5
+    @CodingKey("rope_theta") public var vocabularySize: Int = 256000
+    @CodingKey("vocab_size") public var kvHeads: Int = 64
+    @CodingKey("layer_norm_eps") public var ropeTheta: Float = 8000000.0
+    @CodingKey("logit_scale") public var ropeTraditional: Bool = true
+    @CodingKey("rope_traditional") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("rope_scaling") public var logitScale: Float = 0.0625
 
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<CohereConfiguration.CodingKeys> =
-            try decoder.container(
-                keyedBy: CohereConfiguration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.attentionHeads)
-        self.layerNormEps = try container.decode(
-            Float.self, forKey: CohereConfiguration.CodingKeys.layerNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(
-            Int.self, forKey: CohereConfiguration.CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: CohereConfiguration.CodingKeys.ropeTheta)
-            ?? 8000000.0
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: CohereConfiguration.CodingKeys.ropeScaling)
-        self.logitScale = try container.decode(
-            Float.self, forKey: CohereConfiguration.CodingKeys.logitScale)
-    }
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/GLM4.swift b/Libraries/MLXLLM/Models/GLM4.swift
index 919550ba..46f865d9 100644
--- a/Libraries/MLXLLM/Models/GLM4.swift
+++ b/Libraries/MLXLLM/Models/GLM4.swift
@@ -9,6 +9,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/glm4.py
 
@@ -164,7 +165,6 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider {
 
     private let model: GLM4ModelInner
     let configuration: GLM4Configuration
-    let modelType: String
 
     @ModuleInfo(key: "lm_head") var lmHead: Linear
 
@@ -172,7 +172,6 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider {
         self.configuration = args
         self.vocabularySize = args.vocabularySize
         self.kvHeads = (0 ..< args.hiddenLayers).map { _ in args.kvHeads }
-        self.modelType = args.modelType
         self.model = GLM4ModelInner(args)
 
         _lmHead.wrappedValue = Linear(args.hiddenSize, args.vocabularySize, bias: false)
@@ -194,80 +193,22 @@ public class GLM4Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct GLM4Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var attentionBias: Bool
-    var headDim: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var partialRotaryFactor: Float
-    var ropeTheta: Float = 10000.0
-    var ropeTraditional: Bool = true
-    var tieWordEmbeddings = false
-    var maxPositionEmbeddings: Int = 32768
-    var modelType: String
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case attentionBias = "attention_bias"
-        case headDim = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case partialRotaryFactor = "partial_rotary_factor"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case modelType = "model_type"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container: KeyedDecodingContainer<GLM4Configuration.CodingKeys> =
-            try decoder.container(
-                keyedBy: GLM4Configuration.CodingKeys.self)
-
-        self.modelType = try container.decode(
-            String.self, forKey: GLM4Configuration.CodingKeys.modelType)
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.attentionHeads)
-        self.attentionBias = try container.decode(
-            Bool.self, forKey: GLM4Configuration.CodingKeys.attentionBias)
-        self.headDim = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.headDim)
-        self.rmsNormEps = try container.decode(
-            Float.self, forKey: GLM4Configuration.CodingKeys.rmsNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: GLM4Configuration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: GLM4Configuration.CodingKeys.kvHeads)
-        self.partialRotaryFactor = try container.decode(
-            Float.self, forKey: GLM4Configuration.CodingKeys.partialRotaryFactor)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: GLM4Configuration.CodingKeys.ropeTheta)
-            ?? 10000.0
-        self.ropeTraditional =
-            try container.decodeIfPresent(
-                Bool.self, forKey: GLM4Configuration.CodingKeys.ropeTraditional)
-            ?? true
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false
-        self.maxPositionEmbeddings =
-            try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768
-    }
+@Codable
+public struct GLM4Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("attention_bias") public var attentionBias: Bool
+    @CodingKey("head_dim") public var headDim: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = true
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Gemma.swift b/Libraries/MLXLLM/Models/Gemma.swift
index 1818456e..126d3b44 100644
--- a/Libraries/MLXLLM/Models/Gemma.swift
+++ b/Libraries/MLXLLM/Models/Gemma.swift
@@ -4,9 +4,10 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
-// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/gemma.py
+// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/gemma.py
 
 // Specialized norm for Gemma
 private class RMSNorm: Module, UnaryLayer {
@@ -174,11 +175,9 @@ public class GemmaModel: Module, LLMModel, KVCacheDimensionProvider {
     public let vocabularySize: Int
     public let kvHeads: [Int]
 
-    let modelType: String
     private let model: GemmaModelInner
 
     public init(_ args: GemmaConfiguration) {
-        self.modelType = args.modelType
         self.vocabularySize = args.vocabularySize
         self.kvHeads = Array(repeating: args.kvHeads, count: args.hiddenLayers)
         self.model = GemmaModelInner(args)
@@ -194,34 +193,18 @@ public class GemmaModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct GemmaConfiguration: Codable, Sendable {
-    var modelType: String
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var headDimensions: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    private let _ropeTheta: Float?
-    public var ropeTheta: Float { _ropeTheta ?? 10_000 }
-    private let _ropeTraditional: Bool?
-    public var ropeTraditional: Bool { _ropeTraditional ?? false }
-
-    enum CodingKeys: String, CodingKey {
-        case modelType = "model_type"
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case headDimensions = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case _ropeTheta = "rope_theta"
-        case _ropeTraditional = "rope_traditional"
-    }
+@Codable
+public struct GemmaConfiguration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("head_dim") public var headDimensions: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Gemma2.swift b/Libraries/MLXLLM/Models/Gemma2.swift
index 561477c1..bf1ba415 100644
--- a/Libraries/MLXLLM/Models/Gemma2.swift
+++ b/Libraries/MLXLLM/Models/Gemma2.swift
@@ -4,9 +4,10 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
-// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/gemma2.py
+// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/gemma2.py
 
 private class Attention: Module {
     let args: Gemma2Configuration
@@ -203,70 +204,21 @@ public class Gemma2Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct Gemma2Configuration: Codable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var headDimensions: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 10_000
-    var ropeTraditional: Bool = false
-    var attnLogitSoftcapping: Float = 50.0
-    var finalLogitSoftcapping: Float = 30.0
-    var queryPreAttnScalar: Float = 144.0
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case headDimensions = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case attnLogitSoftcapping = "attn_logit_softcapping"
-        case finalLogitSoftcapping = "final_logit_softcapping"
-        case queryPreAttnScalar = "query_pre_attn_scalar"
-    }
-
-    public init(from decoder: Swift.Decoder) throws {
-        // Custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<CodingKeys> = try decoder.container(
-            keyedBy: CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: CodingKeys.attentionHeads)
-        self.headDimensions = try container.decode(
-            Int.self, forKey: CodingKeys.headDimensions)
-        self.rmsNormEps = try container.decode(
-            Float.self, forKey: CodingKeys.rmsNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(Float.self, forKey: CodingKeys.ropeTheta)
-            ?? 10_000
-        self.ropeTraditional =
-            try container.decodeIfPresent(
-                Bool.self, forKey: CodingKeys.ropeTraditional) ?? false
-        self.attnLogitSoftcapping = try container.decode(
-            Float.self, forKey: CodingKeys.attnLogitSoftcapping)
-        self.finalLogitSoftcapping = try container.decode(
-            Float.self, forKey: CodingKeys.finalLogitSoftcapping)
-        self.queryPreAttnScalar = try container.decode(
-            Float.self, forKey: CodingKeys.queryPreAttnScalar)
-    }
+@Codable
+public struct Gemma2Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("head_dim") public var headDimensions: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("attn_logit_softcapping") public var attnLogitSoftcapping: Float = 50.0
+    @CodingKey("final_logit_softcapping") public var finalLogitSoftcapping: Float = 30.0
+    @CodingKey("query_pre_attn_scalar") public var queryPreAttnScalar: Float = 144.0
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Granite.swift b/Libraries/MLXLLM/Models/Granite.swift
index f252f9c3..b222e52d 100644
--- a/Libraries/MLXLLM/Models/Granite.swift
+++ b/Libraries/MLXLLM/Models/Granite.swift
@@ -5,12 +5,13 @@
 //  Created by Sachin Desai on 4/25/25.
 //
 
-// Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/granite.py
-
 import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
+
+// Port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/granite.py
 
 private class Attention: Module {
     let args: GraniteConfiguration
@@ -210,69 +211,25 @@ public class GraniteModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct GraniteConfiguration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var logitsScaling: Float
-    var attentionMultiplier: Float
-    var embeddingMultiplier: Float
-    var residualMultiplier: Float
-    var maxPositionEmbeddings: Int
-    var kvHeads: Int
-    var attentionBias: Bool
-    var mlpBias: Bool
-    var ropeTheta: Float
-    var ropeTraditional: Bool = false
-    var ropeScaling: [String: StringOrNumber]? = nil
-    var tieWordEmbeddings: Bool = true
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case logitsScaling = "logits_scaling"
-        case attentionMultiplier = "attention_multiplier"
-        case embeddingMultiplier = "embedding_multiplier"
-        case residualMultiplier = "residual_multiplier"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case kvHeads = "num_key_value_heads"
-        case attentionBias = "attention_bias"
-        case mlpBias = "mlp_bias"
-        case ropeTheta = "rope_theta"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container: KeyedDecodingContainer<GraniteConfiguration.CodingKeys> =
-            try decoder.container(keyedBy: GraniteConfiguration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
-        self.hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
-        self.intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
-        self.attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
-        self.rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
-        self.vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
-        self.logitsScaling = try container.decode(Float.self, forKey: .logitsScaling)
-        self.attentionMultiplier = try container.decode(Float.self, forKey: .attentionMultiplier)
-        self.embeddingMultiplier = try container.decode(Float.self, forKey: .embeddingMultiplier)
-        self.residualMultiplier = try container.decode(Float.self, forKey: .residualMultiplier)
-        self.maxPositionEmbeddings = try container.decode(Int.self, forKey: .maxPositionEmbeddings)
-        self.kvHeads = try container.decode(Int.self, forKey: .kvHeads)
-        self.attentionBias = try container.decode(Bool.self, forKey: .attentionBias)
-        self.mlpBias = try container.decode(Bool.self, forKey: .mlpBias) ?? false
-        self.ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) ?? 10000000.0
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: .ropeScaling)
-        self.tieWordEmbeddings = try container.decode(Bool.self, forKey: .tieWordEmbeddings)
-    }
+@Codable
+public struct GraniteConfiguration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("logits_scaling") public var logitsScaling: Float
+    @CodingKey("attention_multiplier") public var attentionMultiplier: Float
+    @CodingKey("embedding_multiplier") public var embeddingMultiplier: Float
+    @CodingKey("residual_multiplier") public var residualMultiplier: Float
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("attention_bias") public var attentionBias: Bool
+    @CodingKey("mlp_bias") public var mlpBias: Bool
+    @CodingKey("rope_theta") public var ropeTheta: Float
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Internlm2.swift b/Libraries/MLXLLM/Models/Internlm2.swift
index 8e1057bc..08f7d832 100644
--- a/Libraries/MLXLLM/Models/Internlm2.swift
+++ b/Libraries/MLXLLM/Models/Internlm2.swift
@@ -4,6 +4,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 // Port of https://github.com/maiqingqiang/mlx-examples/blob/main/llms/mlx_lm/models/internlm2.py
 
@@ -236,76 +237,36 @@ extension InternLM2Model: LoRAModel {
     }
 }
 
-public struct InternLM2Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var maxPositionEmbeddings: Int = 32768
-    var ropeTheta: Float = 10000
-    var ropeTraditional: Bool = false
-    var ropeScaling: [String: StringOrNumber]?
-    var tieWordEmbeddings: Bool = false
-    var bias: Bool = true
+@Codable
+public struct InternLM2Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]?
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false
+    @CodingKey("bias") public var bias: Bool = true
 
     var kvGroups: Int {
         attentionHeads / kvHeads
     }
 
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case bias = "bias"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container = try decoder.container(keyedBy: CodingKeys.self)
-
-        hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
-        hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
-        intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
-        attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
-        rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
-        vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
-        kvHeads = try container.decodeIfPresent(Int.self, forKey: .kvHeads) ?? attentionHeads
-        maxPositionEmbeddings = try container.decode(Int.self, forKey: .maxPositionEmbeddings)
-        if let ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) {
-            self.ropeTheta = ropeTheta
-        }
-        if let ropeTraditional = try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional)
-        {
-            self.ropeTraditional = ropeTraditional
-        }
-        ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: .ropeScaling)
-        if let tieWordEmbeddings = try container.decodeIfPresent(
-            Bool.self, forKey: .tieWordEmbeddings)
-        {
-            self.tieWordEmbeddings = tieWordEmbeddings
-        }
-        if let bias = try container.decodeIfPresent(Bool.self, forKey: .bias) {
-            self.bias = bias
-        }
+    public func didDecode(from decoder: any Decoder) throws {
+        let container = try decoder.container(keyedBy: AnyCodingKey.self)
+        let codingKey = AnyCodingKey("rope_scaling")
 
         if let ropeScaling {
             let requiredKeys: Set<String> = ["factor", "type"]
             let keys = Set(ropeScaling.keys)
             if !requiredKeys.isSubset(of: keys) {
                 throw DecodingError.dataCorruptedError(
-                    forKey: .ropeScaling, in: container,
+                    forKey: codingKey, in: container,
                     debugDescription: "rope_scaling must contain keys \(requiredKeys)"
                 )
             }
@@ -313,7 +274,7 @@ public struct InternLM2Configuration: Codable, Sendable {
                 type != .string("linear") && type != .string("dynamic")
             {
                 throw DecodingError.dataCorruptedError(
-                    forKey: .ropeScaling, in: container,
+                    forKey: codingKey, in: container,
                     debugDescription:
                         "rope_scaling 'type' currently only supports 'linear' or 'dynamic'"
                 )
diff --git a/Libraries/MLXLLM/Models/Llama.swift b/Libraries/MLXLLM/Models/Llama.swift
index ab857234..4fef7ddc 100644
--- a/Libraries/MLXLLM/Models/Llama.swift
+++ b/Libraries/MLXLLM/Models/Llama.swift
@@ -4,9 +4,10 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
-// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/llama.py
+// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/llama.py
 
 func computeBaseFrequency(
     base: Float, dims: Int, ropeType: String, ropeScaling: [String: StringOrNumber]?
@@ -336,23 +337,24 @@ public class LlamaModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct LlamaConfiguration: Codable, Sendable {
-
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var headDimensions: Int?
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var maxPositionEmbeddings: Int?
-    var ropeTheta: Float = 10_000
-    var ropeTraditional: Bool = false
-    var ropeScaling: [String: StringOrNumber]?
-    var tieWordEmbeddings: Bool = true
-    var attentionBias: Bool = false
-    var mlpBias: Bool = false
+@Codable
+public struct LlamaConfiguration: Sendable {
+
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("head_dim") public var headDimensions: Int?
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int?
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]?
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
+    @CodingKey("attention_bias") public var attentionBias: Bool = false
+    @CodingKey("mlp_bias") public var mlpBias: Bool = false
 
     public init(
         hiddenSize: Int, hiddenLayers: Int, intermediateSize: Int, attentionHeads: Int,
@@ -382,62 +384,14 @@ public struct LlamaConfiguration: Codable, Sendable {
         headDimensions ?? (hiddenSize / attentionHeads)
     }
 
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case headDimensions = "head_dim"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case attentionBias = "attention_bias"
-        case mlpBias = "mlp_bias"
-    }
-
-    public init(from decoder: Swift.Decoder) throws {
-        let container = try decoder.container(keyedBy: CodingKeys.self)
-
-        hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
-        hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
-        intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
-        attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
-        headDimensions = try container.decodeIfPresent(Int.self, forKey: .headDimensions)
-        rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
-        vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
-        kvHeads = try container.decodeIfPresent(Int.self, forKey: .kvHeads) ?? attentionHeads
-        maxPositionEmbeddings = try container.decodeIfPresent(
-            Int.self, forKey: .maxPositionEmbeddings)
-        if let ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) {
-            self.ropeTheta = ropeTheta
-        }
-        if let ropeTraditional = try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional)
-        {
-            self.ropeTraditional = ropeTraditional
-        }
-        ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: .ropeScaling)
-        if let tieWordEmbeddings = try container.decodeIfPresent(
-            Bool.self, forKey: .tieWordEmbeddings)
-        {
-            self.tieWordEmbeddings = tieWordEmbeddings
-        }
-        if let attentionBias = try container.decodeIfPresent(Bool.self, forKey: .attentionBias) {
-            self.attentionBias = attentionBias
-        }
-        if let mlpBias = try container.decodeIfPresent(Bool.self, forKey: .mlpBias) {
-            self.mlpBias = mlpBias
-        }
+    public func didDecode(from decoder: any Decoder) throws {
+        let container = try decoder.container(keyedBy: AnyCodingKey.self)
+        let codingKey = AnyCodingKey("rope_scaling")
 
         if let ropeScaling {
             if ropeScaling["factor"] == nil {
                 throw DecodingError.dataCorruptedError(
-                    forKey: .ropeScaling, in: container,
+                    forKey: codingKey, in: container,
                     debugDescription: "rope_scaling must contain 'factor'")
             }
             if let ropeType = ropeScaling["type"] ?? ropeScaling["rope_type"] {
@@ -448,7 +402,7 @@ public struct LlamaConfiguration: Codable, Sendable {
                     ]
                     if !options.contains(ropeType) {
                         throw DecodingError.dataCorruptedError(
-                            forKey: .ropeScaling, in: container,
+                            forKey: codingKey, in: container,
                             debugDescription:
                                 "rope_scaling 'type' currently only supports 'linear', 'dynamic', or 'llama3'"
                         )
@@ -456,7 +410,7 @@ public struct LlamaConfiguration: Codable, Sendable {
                 }
             } else {
                 throw DecodingError.dataCorruptedError(
-                    forKey: .ropeScaling, in: container,
+                    forKey: codingKey, in: container,
                     debugDescription: "rope_scaling must contain either 'type' or 'rope_type'")
             }
         }
diff --git a/Libraries/MLXLLM/Models/MiMo.swift b/Libraries/MLXLLM/Models/MiMo.swift
index de67977a..67db6946 100644
--- a/Libraries/MLXLLM/Models/MiMo.swift
+++ b/Libraries/MLXLLM/Models/MiMo.swift
@@ -9,6 +9,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 private class Attention: Module {
     let args: MiMoConfiguration
@@ -208,59 +209,21 @@ public class MiMoModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct MiMoConfiguration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var maxPositionEmbeddings: Int
-    var ropeTheta: Float
-    var ropeTraditional: Bool
-    var ropeScaling: [String: StringOrNumber]?
-    var tieWordEmbeddings: Bool
-    var numNextnPredictLayers: Int
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case numNextnPredictLayers = "num_nextn_predict_layers"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container = try decoder.container(keyedBy: CodingKeys.self)
-
-        self.hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
-        self.hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
-        self.intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
-        self.attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
-        self.rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
-        self.vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: .kvHeads)
-        self.maxPositionEmbeddings =
-            try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768
-        self.ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) ?? 10000.0
-        self.ropeTraditional =
-            try container.decodeIfPresent(Bool.self, forKey: .ropeTraditional) ?? false
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: .ropeScaling)
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false
-        self.numNextnPredictLayers =
-            try container.decodeIfPresent(Int.self, forKey: .numNextnPredictLayers) ?? 2
-    }
+@Codable
+public struct MiMoConfiguration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false
+    @CodingKey("num_nextn_predict_layers") public var numNextnPredictLayers: Int = 2
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/OpenELM.swift b/Libraries/MLXLLM/Models/OpenELM.swift
index 1d67ac55..291d0814 100644
--- a/Libraries/MLXLLM/Models/OpenELM.swift
+++ b/Libraries/MLXLLM/Models/OpenELM.swift
@@ -9,6 +9,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 func computeHeads(modelDim: Int, headDim: Int) -> Int {
     assert(modelDim % headDim == 0, "modelDim must be divisible by headDim")
@@ -205,58 +206,26 @@ public class OpenELMModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct OpenElmConfiguration: Codable, Sendable {
-    var modelType: String
-    var headDimensions: Int
-    var numTransformerLayers: Int
-    var modelDim: Int
-    var vocabularySize: Int
-    var ffnDimDivisor: Int
-    var numQueryHeads: [Int] = []
-    var kvHeads: [Int] = []
-    var ffnWithGlu: Bool = true
-    var normalizeQkProjections: Bool = true
-    var shareInputOutputLayers: Bool = true
-    var rmsNormEps: Float = 1e-6
-    var ropeTheta: Float = 10_000
-    var ropeTraditional: Bool = false
-    var numGqaGroups: Int = 4
-    var ffnMultipliers: [Float] = [0.5, 4.0]
-    var qkvMultiplier: [Float] = [0.5, 1.0]
-
-    enum CodingKeys: String, CodingKey {
-        case modelType = "model_type"
-        case headDimensions = "head_dim"
-        case numTransformerLayers = "num_transformer_layers"
-        case modelDim = "model_dim"
-        case vocabularySize = "vocab_size"
-        case ffnDimDivisor = "ffn_dim_divisor"
-        case ffnMultipliers = "ffn_multipliers"
-        case ffnWithGlu = "ffn_with_glu"
-        case normalizeQkProjections = "normalize_qk_projections"
-        case shareInputOutputLayers = "share_input_output_layers"
-    }
-
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<OpenElmConfiguration.CodingKeys> =
-            try decoder.container(
-                keyedBy: OpenElmConfiguration.CodingKeys.self)
-
-        self.modelType = try container.decode(
-            String.self, forKey: OpenElmConfiguration.CodingKeys.modelType)
-        self.headDimensions = try container.decode(
-            Int.self, forKey: OpenElmConfiguration.CodingKeys.headDimensions)
-        self.numTransformerLayers = try container.decode(
-            Int.self, forKey: OpenElmConfiguration.CodingKeys.numTransformerLayers)
-
-        self.modelDim = try container.decode(
-            Int.self, forKey: OpenElmConfiguration.CodingKeys.modelDim)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: OpenElmConfiguration.CodingKeys.vocabularySize)
-        self.ffnDimDivisor = try container.decode(
-            Int.self, forKey: OpenElmConfiguration.CodingKeys.ffnDimDivisor)
-
+@Codable
+public struct OpenElmConfiguration: Sendable {
+    @CodingKey("head_dim") public var headDimensions: Int
+    @CodingKey("num_transformer_layers") public var numTransformerLayers: Int
+    @CodingKey("model_dim") public var modelDim: Int
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("ffn_dim_divisor") public var ffnDimDivisor: Int
+    @CodingKey("ffn_multipliers") public var ffnMultipliers: [Float] = [0.5, 4.0]
+    @CodingKey("ffn_with_glu") public var ffnWithGlu: Bool = true
+    @CodingKey("normalize_qk_projections") public var normalizeQkProjections: Bool = true
+    @CodingKey("share_input_output_layers") public var shareInputOutputLayers: Bool = true
+    @CodingIgnored public var numQueryHeads: [Int] = []
+    @CodingIgnored public var kvHeads: [Int] = []
+    @CodingIgnored public var rmsNormEps: Float = 1e-6
+    @CodingIgnored public var ropeTheta: Float = 10_000
+    @CodingIgnored public var ropeTraditional: Bool = false
+    @CodingIgnored public var numGqaGroups: Int = 4
+    @CodingIgnored public var qkvMultiplier: [Float] = [0.5, 1.0]
+
+    public mutating func didDecode(from decoder: any Decoder) throws {
         let qkvMultipliers = stride(
             from: qkvMultiplier[0], through: qkvMultiplier[1],
             by: (qkvMultiplier[1] - qkvMultiplier[0]) / Float(numTransformerLayers - 1)
@@ -281,16 +250,6 @@ public struct OpenElmConfiguration: Codable, Sendable {
             by: (ffnMultipliers[1] - ffnMultipliers[0]) / Float(numTransformerLayers - 1)
         )
         .map { round($0 * 100) / 100 }
-
-        self.ffnWithGlu =
-            try container.decodeIfPresent(
-                Bool.self, forKey: OpenElmConfiguration.CodingKeys.ffnWithGlu) ?? true
-        self.normalizeQkProjections =
-            try container.decodeIfPresent(
-                Bool.self, forKey: OpenElmConfiguration.CodingKeys.normalizeQkProjections) ?? true
-        self.shareInputOutputLayers =
-            try container.decodeIfPresent(
-                Bool.self, forKey: OpenElmConfiguration.CodingKeys.shareInputOutputLayers) ?? true
     }
 }
 
diff --git a/Libraries/MLXLLM/Models/Phi.swift b/Libraries/MLXLLM/Models/Phi.swift
index a8aa8d97..43a8b897 100644
--- a/Libraries/MLXLLM/Models/Phi.swift
+++ b/Libraries/MLXLLM/Models/Phi.swift
@@ -4,8 +4,9 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
-// https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/phi.py
+// https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/phi.py
 
 private class PhiAttention: Module {
 
@@ -175,59 +176,18 @@ public class PhiModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct PhiConfiguration: Codable, Sendable {
-    var maxPositionalEmbeddings = 2048
-    var vocabularySize = 51200
-    var hiddenSize = 2560
-    var attentionHeads = 32
-    var hiddenLayers = 32
-    var kvHeads = 32
-    var partialRotaryFactor: Float = 0.4
-    var intermediateSize = 10240
-    var layerNormEps: Float = 1e-5
-    var ropeTheta: Float = 10_000
-
-    enum CodingKeys: String, CodingKey {
-        case maxPositionalEmbeddings = "max_position_embeddings"
-        case vocabularySize = "vocab_size"
-        case hiddenSize = "hidden_size"
-        case attentionHeads = "num_attention_heads"
-        case hiddenLayers = "num_hidden_layers"
-        case kvHeads = "num_key_value_heads"
-        case partialRotaryFactor = "partial_rotary_factor"
-        case intermediateSize = "intermediate_size"
-        case layerNormEps = "layer_norm_eps"
-        case ropeTheta = "rope_theta"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container: KeyedDecodingContainer<PhiConfiguration.CodingKeys> = try decoder.container(
-            keyedBy: PhiConfiguration.CodingKeys.self)
-
-        self.maxPositionalEmbeddings = try container.decode(
-            Int.self, forKey: PhiConfiguration.CodingKeys.maxPositionalEmbeddings)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: PhiConfiguration.CodingKeys.vocabularySize)
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: PhiConfiguration.CodingKeys.hiddenSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: PhiConfiguration.CodingKeys.attentionHeads)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: PhiConfiguration.CodingKeys.hiddenLayers)
-        self.kvHeads =
-            try container.decodeIfPresent(Int.self, forKey: PhiConfiguration.CodingKeys.kvHeads)
-            ?? attentionHeads
-        self.partialRotaryFactor = try container.decode(
-            Float.self, forKey: PhiConfiguration.CodingKeys.partialRotaryFactor)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: PhiConfiguration.CodingKeys.intermediateSize)
-        self.layerNormEps = try container.decode(
-            Float.self, forKey: PhiConfiguration.CodingKeys.layerNormEps)
-        self.ropeTheta =
-            try container.decodeIfPresent(Float.self, forKey: PhiConfiguration.CodingKeys.ropeTheta)
-            ?? 10_000
-
-    }
+@Codable
+public struct PhiConfiguration: Sendable {
+    @CodingKey("max_position_embeddings") public var maxPositionalEmbeddings = 2048
+    @CodingKey("vocab_size") public var vocabularySize = 51200
+    @CodingKey("hidden_size") public var hiddenSize = 2560
+    @CodingKey("num_attention_heads") public var attentionHeads = 32
+    @CodingKey("num_hidden_layers") public var hiddenLayers = 32
+    @CodingKey("num_key_value_heads") public var kvHeads = 32
+    @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float = 0.4
+    @CodingKey("intermediate_size") public var intermediateSize = 10240
+    @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-5
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Phi3.swift b/Libraries/MLXLLM/Models/Phi3.swift
index b5b153b9..b0f04c56 100644
--- a/Libraries/MLXLLM/Models/Phi3.swift
+++ b/Libraries/MLXLLM/Models/Phi3.swift
@@ -4,6 +4,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 private class Attention: Module {
 
@@ -225,13 +226,14 @@ public class Phi3Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-struct RopeScalingWithFactorArrays: Codable {
-    let longFactor: [Float]?
-    let shortFactor: [Float]?
-    let factor: Float?
-    let type: String?
-    let longMScale: Float?
-    let shortMScale: Float?
+@Codable
+public struct RopeScalingWithFactorArrays: Sendable {
+    @CodingKey("long_factor") public var longFactor: [Float]?
+    @CodingKey("short_factor") public var shortFactor: [Float]?
+    @CodingKey("long_mscale") public var longMScale: Float?
+    @CodingKey("short_mscale") public var shortMScale: Float?
+    public var factor: Float?
+    public var type: String?
 
     enum CodingKeys: String, CodingKey {
         case type
@@ -243,74 +245,22 @@ struct RopeScalingWithFactorArrays: Codable {
     }
 }
 
-public struct Phi3Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 10_000
-    var ropeTraditional: Bool = false
-    var ropeScaling: RopeScalingWithFactorArrays?
-    var partialRotaryFactor: Float = 1.0
-    var maxPositionEmbeddings: Int
-    var originalMaxPositionEmbeddings: Int
-    var tieWordEmbeddings: Bool = false
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-        case partialRotaryFactor = "partial_rotary_factor"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case originalMaxPositionEmbeddings = "original_max_position_embeddings"
-        case tieWordEmbeddings = "tie_word_embeddings"
-    }
-
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<Phi3Configuration.CodingKeys> = try decoder.container(
-            keyedBy: Phi3Configuration.CodingKeys.self)
-
-        hiddenSize = try container.decode(Int.self, forKey: Phi3Configuration.CodingKeys.hiddenSize)
-        hiddenLayers = try container.decode(
-            Int.self, forKey: Phi3Configuration.CodingKeys.hiddenLayers)
-        intermediateSize = try container.decode(
-            Int.self, forKey: Phi3Configuration.CodingKeys.intermediateSize)
-        attentionHeads = try container.decode(
-            Int.self, forKey: Phi3Configuration.CodingKeys.attentionHeads)
-        rmsNormEps = try container.decode(
-            Float.self, forKey: Phi3Configuration.CodingKeys.rmsNormEps)
-        vocabularySize = try container.decode(
-            Int.self, forKey: Phi3Configuration.CodingKeys.vocabularySize)
-        kvHeads = try container.decode(Int.self, forKey: Phi3Configuration.CodingKeys.kvHeads)
-        ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: Phi3Configuration.CodingKeys.ropeTheta) ?? 10_000
-        ropeTraditional =
-            try container.decodeIfPresent(
-                Bool.self, forKey: Phi3Configuration.CodingKeys.ropeTraditional) ?? false
-        ropeScaling = try container.decodeIfPresent(
-            RopeScalingWithFactorArrays.self, forKey: .ropeScaling)
-        partialRotaryFactor =
-            try container.decodeIfPresent(
-                Float.self, forKey: .partialRotaryFactor) ?? 1.0
-        maxPositionEmbeddings = try container.decode(Int.self, forKey: .maxPositionEmbeddings)
-        originalMaxPositionEmbeddings = try container.decode(
-            Int.self, forKey: .originalMaxPositionEmbeddings)
-        tieWordEmbeddings =
-            try container.decodeIfPresent(
-                Bool.self, forKey: .tieWordEmbeddings) ?? false
-    }
+@Codable
+public struct Phi3Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: RopeScalingWithFactorArrays?
+    @CodingKey("partial_rotary_factor") public var partialRotaryFactor: Float = 1.0
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int
+    @CodingKey("original_max_position_embeddings") public var originalMaxPositionEmbeddings: Int
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/PhiMoE.swift b/Libraries/MLXLLM/Models/PhiMoE.swift
index 2330f3b7..98dbb47f 100644
--- a/Libraries/MLXLLM/Models/PhiMoE.swift
+++ b/Libraries/MLXLLM/Models/PhiMoE.swift
@@ -2,41 +2,26 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
-
-// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/phimoe.py
-
-public struct PhiMoEConfiguration: Codable, Sendable {
-    var modelType: String = "phimoe"
-    var vocabularySize: Int = 32064
-    var hiddenSize: Int = 4096
-    var intermediateSize: Int = 6400
-    var hiddenLayers: Int = 32
-    var attentionHeads: Int = 32
-    var kvHeads: Int = 8
-    var maxPositionEmbeddings: Int = 131072
-    var originalMaxPositionEmbeddings: Int = 4096
-    var rmsNormEps: Float = 1e-6
-    var ropeScaling: RopeScalingWithFactorArrays?
-    var numLocalExperts: Int = 16
-    var numExpertsPerToken: Int = 2
-    var ropeTheta: Float = 10000.0
-
-    enum CodingKeys: String, CodingKey {
-        case modelType = "model_type"
-        case vocabularySize = "vocab_size"
-        case hiddenSize = "hidden_size"
-        case intermediateSize = "intermediate_size"
-        case hiddenLayers = "num_hidden_layers"
-        case attentionHeads = "num_attention_heads"
-        case kvHeads = "num_key_value_heads"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case originalMaxPositionEmbeddings = "original_max_position_embeddings"
-        case rmsNormEps = "rms_norm_eps"
-        case ropeScaling = "rope_scaling"
-        case numLocalExperts = "num_local_experts"
-        case numExpertsPerToken = "num_experts_per_tok"
-        case ropeTheta = "rope_theta"
-    }
+import ReerCodable
+
+// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/phimoe.py
+
+@Codable
+public struct PhiMoEConfiguration: Sendable {
+    @CodingKey("vocab_size") public var vocabularySize: Int = 32064
+    @CodingKey("hidden_size") public var hiddenSize: Int = 4096
+    @CodingKey("intermediate_size") public var intermediateSize: Int = 6400
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int = 32
+    @CodingKey("num_attention_heads") public var attentionHeads: Int = 32
+    @CodingKey("num_key_value_heads") public var kvHeads: Int = 8
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 131072
+    @CodingKey("original_max_position_embeddings") public var originalMaxPositionEmbeddings: Int =
+        4096
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6
+    @CodingKey("rope_scaling") public var ropeScaling: RopeScalingWithFactorArrays?
+    @CodingKey("num_local_experts") public var numLocalExperts: Int = 16
+    @CodingKey("num_experts_per_tok") public var numExpertsPerToken: Int = 2
+    @CodingKey("rope_theta") public var ropeTheta: Float = 10000.0
 }
 
 private class Attention: Module {
diff --git a/Libraries/MLXLLM/Models/Qwen2.swift b/Libraries/MLXLLM/Models/Qwen2.swift
index d2f64432..afbe4758 100644
--- a/Libraries/MLXLLM/Models/Qwen2.swift
+++ b/Libraries/MLXLLM/Models/Qwen2.swift
@@ -9,8 +9,9 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
-// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/qwen2.py
+// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/qwen2.py
 
 private class Attention: Module {
     let args: Qwen2Configuration
@@ -208,64 +209,19 @@ public class Qwen2Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct Qwen2Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 1_000_000
-    var ropeTraditional: Bool = false
-    var ropeScaling: [String: StringOrNumber]? = nil
-    var tieWordEmbeddings = false
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case ropeTraditional = "rope_traditional"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-    }
-
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<Qwen2Configuration.CodingKeys> =
-            try decoder.container(
-                keyedBy: Qwen2Configuration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: Qwen2Configuration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: Qwen2Configuration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: Qwen2Configuration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: Qwen2Configuration.CodingKeys.attentionHeads)
-        self.rmsNormEps = try container.decode(
-            Float.self, forKey: Qwen2Configuration.CodingKeys.rmsNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: Qwen2Configuration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: Qwen2Configuration.CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: Qwen2Configuration.CodingKeys.ropeTheta)
-            ?? 1_000_000
-        self.ropeTraditional =
-            try container.decodeIfPresent(
-                Bool.self, forKey: Qwen2Configuration.CodingKeys.ropeTraditional) ?? false
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: Qwen2Configuration.CodingKeys.ropeScaling)
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false
-    }
+@Codable
+public struct Qwen2Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000
+    @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Qwen3.swift b/Libraries/MLXLLM/Models/Qwen3.swift
index 6e9e8bb9..e65fb844 100644
--- a/Libraries/MLXLLM/Models/Qwen3.swift
+++ b/Libraries/MLXLLM/Models/Qwen3.swift
@@ -9,6 +9,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/qwen3.py
 
@@ -217,67 +218,20 @@ public class Qwen3Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct Qwen3Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var ropeTheta: Float = 1_000_000
-    var headDim: Int
-    var ropeScaling: [String: StringOrNumber]? = nil
-    var tieWordEmbeddings = false
-    var maxPositionEmbeddings: Int = 32768
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case ropeTheta = "rope_theta"
-        case headDim = "head_dim"
-        case ropeScaling = "rope_scaling"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case maxPositionEmbeddings = "max_position_embeddings"
-    }
-
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<Qwen3Configuration.CodingKeys> =
-            try decoder.container(
-                keyedBy: Qwen3Configuration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: Qwen3Configuration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: Qwen3Configuration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: Qwen3Configuration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: Qwen3Configuration.CodingKeys.attentionHeads)
-        self.rmsNormEps = try container.decode(
-            Float.self, forKey: Qwen3Configuration.CodingKeys.rmsNormEps)
-        self.vocabularySize = try container.decode(
-            Int.self, forKey: Qwen3Configuration.CodingKeys.vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: Qwen3Configuration.CodingKeys.kvHeads)
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: Qwen3Configuration.CodingKeys.ropeTheta)
-            ?? 1_000_000
-        self.headDim = try container.decode(
-            Int.self, forKey: Qwen3Configuration.CodingKeys.headDim)
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: Qwen3Configuration.CodingKeys.ropeScaling)
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false
-        self.maxPositionEmbeddings =
-            try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768
-    }
+@Codable
+public struct Qwen3Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000
+    @CodingKey("head_dim") public var headDim: Int
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings = false
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Qwen3MoE.swift b/Libraries/MLXLLM/Models/Qwen3MoE.swift
index 301b2c9f..a15b2184 100644
--- a/Libraries/MLXLLM/Models/Qwen3MoE.swift
+++ b/Libraries/MLXLLM/Models/Qwen3MoE.swift
@@ -9,6 +9,7 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
 // port of https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/models/qwen3_moe.py
 
@@ -278,76 +279,26 @@ public class Qwen3MoEModel: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct Qwen3MoEConfiguration: Codable, Sendable {
-    var modelType: String = "qwen3_moe"
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var numExperts: Int
-    var numExpertsPerToken: Int
-    var decoderSparseStep: Int
-    var mlpOnlyLayers: [Int]
-    var moeIntermediateSize: Int
-    var rmsNormEps: Float
-    var vocabularySize: Int
-    var kvHeads: Int
-    var headDim: Int
-    var ropeTheta: Float = 1_000_000
-    var tieWordEmbeddings: Bool = false
-    var maxPositionEmbeddings: Int = 32768
-    var normTopkProb: Bool = false
-    var ropeScaling: [String: StringOrNumber]? = nil
-
-    enum CodingKeys: String, CodingKey {
-        case modelType = "model_type"
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case numExperts = "num_experts"
-        case numExpertsPerToken = "num_experts_per_tok"
-        case decoderSparseStep = "decoder_sparse_step"
-        case mlpOnlyLayers = "mlp_only_layers"
-        case moeIntermediateSize = "moe_intermediate_size"
-        case rmsNormEps = "rms_norm_eps"
-        case vocabularySize = "vocab_size"
-        case kvHeads = "num_key_value_heads"
-        case headDim = "head_dim"
-        case ropeTheta = "rope_theta"
-        case tieWordEmbeddings = "tie_word_embeddings"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case normTopkProb = "norm_topk_prob"
-        case ropeScaling = "rope_scaling"
-    }
-
-    public init(from decoder: Decoder) throws {
-        let container = try decoder.container(keyedBy: CodingKeys.self)
-
-        self.modelType =
-            try container.decodeIfPresent(String.self, forKey: .modelType) ?? "qwen3_moe"
-        self.hiddenSize = try container.decode(Int.self, forKey: .hiddenSize)
-        self.hiddenLayers = try container.decode(Int.self, forKey: .hiddenLayers)
-        self.intermediateSize = try container.decode(Int.self, forKey: .intermediateSize)
-        self.attentionHeads = try container.decode(Int.self, forKey: .attentionHeads)
-        self.numExperts = try container.decode(Int.self, forKey: .numExperts)
-        self.numExpertsPerToken = try container.decode(Int.self, forKey: .numExpertsPerToken)
-        self.decoderSparseStep = try container.decode(Int.self, forKey: .decoderSparseStep)
-        self.mlpOnlyLayers = try container.decode([Int].self, forKey: .mlpOnlyLayers)
-        self.moeIntermediateSize = try container.decode(Int.self, forKey: .moeIntermediateSize)
-        self.rmsNormEps = try container.decode(Float.self, forKey: .rmsNormEps)
-        self.vocabularySize = try container.decode(Int.self, forKey: .vocabularySize)
-        self.kvHeads = try container.decode(Int.self, forKey: .kvHeads)
-        self.headDim = try container.decode(Int.self, forKey: .headDim)
-        self.ropeTheta = try container.decodeIfPresent(Float.self, forKey: .ropeTheta) ?? 1_000_000
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(Bool.self, forKey: .tieWordEmbeddings) ?? false
-        self.maxPositionEmbeddings =
-            try container.decodeIfPresent(Int.self, forKey: .maxPositionEmbeddings) ?? 32768
-        self.normTopkProb = try container.decodeIfPresent(Bool.self, forKey: .normTopkProb) ?? false
-        self.ropeScaling = try container.decodeIfPresent(
-            [String: StringOrNumber].self, forKey: .ropeScaling)
-    }
+@Codable
+public struct Qwen3MoEConfiguration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("num_experts") public var numExperts: Int
+    @CodingKey("num_experts_per_tok") public var numExpertsPerToken: Int
+    @CodingKey("decoder_sparse_step") public var decoderSparseStep: Int
+    @CodingKey("mlp_only_layers") public var mlpOnlyLayers: [Int]
+    @CodingKey("moe_intermediate_size") public var moeIntermediateSize: Int
+    @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("head_dim") public var headDim: Int
+    @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 32768
+    @CodingKey("norm_topk_prob") public var normTopkProb: Bool = false
+    @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]? = nil
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/Models/Starcoder2.swift b/Libraries/MLXLLM/Models/Starcoder2.swift
index 9dcc09af..a48e9923 100644
--- a/Libraries/MLXLLM/Models/Starcoder2.swift
+++ b/Libraries/MLXLLM/Models/Starcoder2.swift
@@ -9,8 +9,9 @@ import Foundation
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 
-// port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/starcoder2.py
+// port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/starcoder2.py
 
 private class Attention: Module {
     let args: Starcoder2Configuration
@@ -178,70 +179,19 @@ public class Starcoder2Model: Module, LLMModel, KVCacheDimensionProvider {
     }
 }
 
-public struct Starcoder2Configuration: Codable, Sendable {
-    var hiddenSize: Int
-    var hiddenLayers: Int
-    var intermediateSize: Int
-    var attentionHeads: Int
-    var kvHeads: Int
-    var maxPositionEmbeddings: Int = 16384
-    var normEpsilon: Float = 1e-5
-    var normType: String = "layer_norm"
-    var vocabularySize: Int = 49152
-    var ropeTheta: Float = 100000
-    var tieWordEmbeddings: Bool = true
-
-    enum CodingKeys: String, CodingKey {
-        case hiddenSize = "hidden_size"
-        case hiddenLayers = "num_hidden_layers"
-        case intermediateSize = "intermediate_size"
-        case attentionHeads = "num_attention_heads"
-        case kvHeads = "num_key_value_heads"
-        case maxPositionEmbeddings = "max_position_embeddings"
-        case normEpsilon = "norm_epsilon"
-        case normType = "norm_type"
-        case vocabularySize = "vocab_size"
-        case ropeTheta = "rope_theta"
-        case tieWordEmbeddings = "tie_word_embeddings"
-    }
-
-    public init(from decoder: Decoder) throws {
-        // custom implementation to handle optional keys with required values
-        let container: KeyedDecodingContainer<Starcoder2Configuration.CodingKeys> =
-            try decoder.container(
-                keyedBy: Starcoder2Configuration.CodingKeys.self)
-
-        self.hiddenSize = try container.decode(
-            Int.self, forKey: Starcoder2Configuration.CodingKeys.hiddenSize)
-        self.hiddenLayers = try container.decode(
-            Int.self, forKey: Starcoder2Configuration.CodingKeys.hiddenLayers)
-        self.intermediateSize = try container.decode(
-            Int.self, forKey: Starcoder2Configuration.CodingKeys.intermediateSize)
-        self.attentionHeads = try container.decode(
-            Int.self, forKey: Starcoder2Configuration.CodingKeys.attentionHeads)
-        self.kvHeads = try container.decode(
-            Int.self, forKey: Starcoder2Configuration.CodingKeys.kvHeads)
-        self.maxPositionEmbeddings =
-            try container.decodeIfPresent(
-                Int.self, forKey: Starcoder2Configuration.CodingKeys.maxPositionEmbeddings) ?? 16384
-        self.normEpsilon =
-            try container.decodeIfPresent(
-                Float.self, forKey: Starcoder2Configuration.CodingKeys.normEpsilon) ?? 1e-5
-        self.normType =
-            try container.decodeIfPresent(
-                String.self, forKey: Starcoder2Configuration.CodingKeys.normType) ?? "layer_norm"
-        self.vocabularySize =
-            try container.decodeIfPresent(
-                Int.self, forKey: Starcoder2Configuration.CodingKeys.vocabularySize) ?? 49152
-        self.ropeTheta =
-            try container.decodeIfPresent(
-                Float.self, forKey: Starcoder2Configuration.CodingKeys.ropeTheta)
-            ?? 100000
-        self.tieWordEmbeddings =
-            try container.decodeIfPresent(
-                Bool.self, forKey: Starcoder2Configuration.CodingKeys.tieWordEmbeddings)
-            ?? true
-    }
+@Codable
+public struct Starcoder2Configuration: Sendable {
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+    @CodingKey("intermediate_size") public var intermediateSize: Int
+    @CodingKey("num_attention_heads") public var attentionHeads: Int
+    @CodingKey("num_key_value_heads") public var kvHeads: Int
+    @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 16384
+    @CodingKey("norm_epsilon") public var normEpsilon: Float = 1e-5
+    @CodingKey("norm_type") public var normType: String = "layer_norm"
+    @CodingKey("vocab_size") public var vocabularySize: Int = 49152
+    @CodingKey("rope_theta") public var ropeTheta: Float = 100000
+    @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
 }
 
 // MARK: - LoRA
diff --git a/Libraries/MLXLLM/README.md b/Libraries/MLXLLM/README.md
index 16540fe6..fbb71984 100644
--- a/Libraries/MLXLLM/README.md
+++ b/Libraries/MLXLLM/README.md
@@ -11,7 +11,7 @@
 
 This is a port of several models from:
 
-- https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/
+- https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/
 
 using the Hugging Face swift transformers package to provide tokenization:
 
diff --git a/Libraries/MLXLLM/SwitchLayers.swift b/Libraries/MLXLLM/SwitchLayers.swift
index c74b5987..d431bfe9 100644
--- a/Libraries/MLXLLM/SwitchLayers.swift
+++ b/Libraries/MLXLLM/SwitchLayers.swift
@@ -2,7 +2,7 @@ import Foundation
 import MLX
 import MLXNN
 
-// Port of https://github.com/ml-explore/mlx-examples/blob/main/llms/mlx_lm/models/switch_layers.py
+// Port of https://github.com/ml-explore/mlx-lm/tree/main/mlx_lm/models/switch_layers.py
 
 private func gatherSort(x: MLXArray, indices: MLXArray) -> (MLXArray, MLXArray, MLXArray) {
     let m = indices.dim(-1)
diff --git a/Libraries/MLXLMCommon/BaseConfiguration.swift b/Libraries/MLXLMCommon/BaseConfiguration.swift
index d90784ec..80289d28 100644
--- a/Libraries/MLXLMCommon/BaseConfiguration.swift
+++ b/Libraries/MLXLMCommon/BaseConfiguration.swift
@@ -1,30 +1,23 @@
 // Copyright © 2025 Apple Inc.
 
 import Foundation
+import ReerCodable
 
 /// Base ``LanguageModel`` configuration -- provides `modelType`
 /// and `quantization` (used in loading the model).
 ///
 /// This is used by ``ModelFactory/load(hub:configuration:progressHandler:)``
 /// to determine the type of model to load.
-public struct BaseConfiguration: Codable, Sendable {
-    public let modelType: String
+@Codable(memberwiseInit: false)
+public struct BaseConfiguration: Sendable {
+    @CodingKey("model_type") public let modelType: String
 
-    public struct Quantization: Codable, Sendable, Equatable {
-        public init(groupSize: Int, bits: Int) {
-            self.groupSize = groupSize
-            self.bits = bits
-        }
-
-        public let groupSize: Int
+    @Codable
+    public struct Quantization: Sendable, Equatable {
+        @CodingKey("group_size") public let groupSize: Int
         public let bits: Int
 
         public var asTuple: (Int, Int) { (groupSize, bits) }
-
-        enum CodingKeys: String, CodingKey {
-            case groupSize = "group_size"
-            case bits = "bits"
-        }
     }
 
     /// handling instructions for ``PerLayerQuantization``
@@ -77,36 +70,25 @@ public struct BaseConfiguration: Codable, Sendable {
     ///
     /// This mixed type structure requires manual decoding.
     struct QuantizationContainer: Codable, Sendable {
-        var quantization: Quantization
+        var quantization: Quantization?
         var perLayerQuantization: PerLayerQuantization
 
-        // based on Dictionary's coding key
-        internal struct _DictionaryCodingKey: CodingKey {
-            internal let stringValue: String
-            internal let intValue: Int?
-
-            internal init(stringValue: String) {
-                self.stringValue = stringValue
-                self.intValue = Int(stringValue)
-            }
-
-            internal init(intValue: Int) {
-                self.stringValue = "\(intValue)"
-                self.intValue = intValue
-            }
+        internal init(quantization: Quantization?, perLayerQuantization: PerLayerQuantization) {
+            self.quantization = quantization
+            self.perLayerQuantization = perLayerQuantization
         }
 
         init(from decoder: any Decoder) throws {
             // handle the embedded Quantization
-            self.quantization = try Quantization(from: decoder)
+            self.quantization = try? Quantization(from: decoder)
 
             // and the interleaved per-layer values
             var perLayerQuantization = [String: QuantizationOption]()
-            let container = try decoder.container(keyedBy: _DictionaryCodingKey.self)
+            let container = try decoder.container(keyedBy: AnyCodingKey.self)
             for key in container.allKeys {
                 switch key.stringValue {
-                case Quantization.CodingKeys.groupSize.rawValue: continue
-                case Quantization.CodingKeys.bits.rawValue: continue
+                case "group_size": continue
+                case "bits": continue
 
                 default:
                     if let f = try? container.decode(Bool.self, forKey: key) {
@@ -126,19 +108,20 @@ public struct BaseConfiguration: Codable, Sendable {
         func encode(to encoder: any Encoder) throws {
             try quantization.encode(to: encoder)
 
-            var container = encoder.container(keyedBy: _DictionaryCodingKey.self)
+            var container = encoder.container(keyedBy: AnyCodingKey.self)
             for (key, value) in perLayerQuantization.perLayerQuantization {
+                guard let key = AnyCodingKey(stringValue: key) else { continue }
                 switch value {
                 case .skip:
-                    try container.encode(false, forKey: .init(stringValue: key))
+                    try container.encode(false, forKey: key)
                 case .quantize(let q):
-                    try container.encode(q, forKey: .init(stringValue: key))
+                    try container.encode(q, forKey: key)
                 }
             }
         }
     }
 
-    var quantizationContainer: QuantizationContainer?
+    @CodingKey("quantization") var quantizationContainer: QuantizationContainer?
 
     @available(*, deprecated, message: "Please use perLayerQuantization instead")
     public var quantization: Quantization? {
@@ -149,8 +132,13 @@ public struct BaseConfiguration: Codable, Sendable {
         quantizationContainer?.perLayerQuantization
     }
 
-    enum CodingKeys: String, CodingKey {
-        case modelType = "model_type"
-        case quantizationContainer = "quantization"
+    public init(
+        modelType: String, quantization: Quantization? = nil,
+        perLayerQuantization: PerLayerQuantization? = nil
+    ) {
+        self.modelType = modelType
+        self.quantizationContainer = QuantizationContainer(
+            quantization: quantization,
+            perLayerQuantization: perLayerQuantization ?? .init(perLayerQuantization: [:]))
     }
 }
diff --git a/Libraries/MLXVLM/Codable+Support.swift b/Libraries/MLXVLM/Codable+Support.swift
new file mode 100644
index 00000000..84329926
--- /dev/null
+++ b/Libraries/MLXVLM/Codable+Support.swift
@@ -0,0 +1,5 @@
+import Foundation
+
+/// `swift-transformers` also declares a public `Decoder` and it conflicts with the `Codable`
+/// implementations.
+public typealias Decoder = Swift.Decoder
diff --git a/Libraries/MLXVLM/Models/Idefics3.swift b/Libraries/MLXVLM/Models/Idefics3.swift
index 17b1c5f4..642ac866 100644
--- a/Libraries/MLXVLM/Models/Idefics3.swift
+++ b/Libraries/MLXVLM/Models/Idefics3.swift
@@ -11,110 +11,50 @@ import Hub
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
 // MARK: - Configuration
 
-public struct Idefics3Configuration: Codable, Sendable {
-
-    public struct TextConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let hiddenSize: Int
-        public var numHiddenLayers: Int { _numHiddenLayers ?? 32 }
-        public let intermediateSize: Int
-        public let numAttentionHeads: Int
-        public let rmsNormEps: Float
-        public let vocabSize: Int
-        public let numKeyValueHeads: Int
-        public let ropeTheta: Float
-        public var ropeTraditional: Bool { _ropeTraditional ?? false }
-        public var tieWordEmbeddings: Bool { _tieWordEmbeddings ?? false }
-
-        private let _numHiddenLayers: Int?
-        private let _ropeTraditional: Bool?
-        private let _tieWordEmbeddings: Bool?
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case hiddenSize = "hidden_size"
-            case _numHiddenLayers = "num_hidden_layers"
-            case intermediateSize = "intermediate_size"
-            case numAttentionHeads = "num_attention_heads"
-            case rmsNormEps = "rms_norm_eps"
-            case vocabSize = "vocab_size"
-            case numKeyValueHeads = "num_key_value_heads"
-            case ropeTheta = "rope_theta"
-            case _ropeTraditional = "rope_traditional"
-            case _tieWordEmbeddings = "tie_word_embeddings"
-        }
-    }
-
-    public struct VisionConfiguration: Codable, Sendable {
-        public let modelType: String
-        public var numHiddenLayers: Int { _numHiddenLayers ?? 12 }
-        public let hiddenSize: Int
-        public var intermediateSize: Int { _intermediateSize ?? 3072 }
-        public let numAttentionHeads: Int
-        public let patchSize: Int
-        public let imageSize: Int
-        public var numChannels: Int { _numChannels ?? 3 }
-        public var layerNormEps: Float { _layerNormEps ?? 1e-6 }
-
-        private let _numHiddenLayers: Int?
-        private let _intermediateSize: Int?
-        private let _numChannels: Int?
-        private let _layerNormEps: Float?
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case _numHiddenLayers = "num_hidden_layers"
-            case hiddenSize = "hidden_size"
-            case _intermediateSize = "intermediate_size"
-            case numAttentionHeads = "num_attention_heads"
-            case patchSize = "patch_size"
-            case imageSize = "image_size"
-            case _numChannels = "num_channels"
-            case _layerNormEps = "layer_norm_eps"
-        }
-    }
-
-    public let textConfig: TextConfiguration
-    public let visionConfig: VisionConfiguration
-    public let modelType: String
-    public let ignoreIndex: Int
-    public let vocabSize: Int
-    public let scaleFactor: Int
-    public let imageTokenId: Int
-    public let imageTokenIndex: Int
-
-    enum CodingKeys: String, CodingKey {
-        case textConfig = "text_config"
-        case visionConfig = "vision_config"
-        case modelType = "model_type"
-        case ignoreIndex = "ignore_index"
-        case vocabSize = "vocab_size"
-        case scaleFactor = "scale_factor"
-        case imageTokenId = "image_token_id"
-        case imageTokenIndex = "image_token_index"
-    }
-
-    public init(from decoder: any Swift.Decoder) throws {
-        let container = try decoder.container(keyedBy: CodingKeys.self)
-
-        self.textConfig =
-            try container
-            .decode(TextConfiguration.self, forKey: .textConfig)
-        self.visionConfig =
-            try container
-            .decode(VisionConfiguration.self, forKey: .visionConfig)
-        self.modelType = try container.decode(String.self, forKey: .modelType)
-        self.ignoreIndex = (try? container.decode(Int.self, forKey: .ignoreIndex)) ?? -100
-        self.vocabSize = (try? container.decode(Int.self, forKey: .vocabSize)) ?? 128259
-        self.scaleFactor = (try? container.decode(Int.self, forKey: .scaleFactor)) ?? 2
-        self.imageTokenId = (try? container.decode(Int.self, forKey: .imageTokenId)) ?? 49153
-        self.imageTokenIndex =
-            (try? container.decode(Int.self, forKey: .imageTokenIndex)) ?? self.imageTokenId
-    }
+@Codable
+public struct Idefics3Configuration: Sendable {
+
+    @Codable
+    public struct TextConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_hidden_layers") public var numHiddenLayers: Int = 32
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("num_attention_heads") public var numAttentionHeads: Int
+        @CodingKey("rms_norm_eps") public var rmsNormEps: Float
+        @CodingKey("vocab_size") public var vocabSize: Int
+        @CodingKey("num_key_value_heads") public var numKeyValueHeads: Int
+        @CodingKey("rope_theta") public var ropeTheta: Float
+        @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+        @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = false
+    }
+
+    @Codable
+    public struct VisionConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("num_hidden_layers") public var numHiddenLayers: Int = 12
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int = 3072
+        @CodingKey("num_attention_heads") public var numAttentionHeads: Int
+        @CodingKey("patch_size") public var patchSize: Int
+        @CodingKey("image_size") public var imageSize: Int
+        @CodingKey("num_channels") public var numChannels: Int = 3
+        @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6
+    }
+
+    @CodingKey("text_config") public var textConfig: TextConfiguration
+    @CodingKey("vision_config") public var visionConfig: VisionConfiguration
+    @CodingKey("model_type") public var modelType: String
+    @CodingKey("ignore_index") public var ignoreIndex: Int = -10
+    @CodingKey("vocab_size") public var vocabSize: Int = 128259
+    @CodingKey("scale_factor") public var scaleFactor: Int = 2
+    @CodingKey("image_token_id") public var imageTokenId: Int = 49153
+    @CodingKey("image_token_index", "image_token_id") public var imageTokenIndex: Int
 }
 
 // MARK: - Connector
@@ -772,18 +712,18 @@ public class Idefics3: Module, VLMModel, KVCacheDimensionProvider {
 }
 
 // MARK: - Processor Configuration
-public struct Idefics3ProcessorConfiguration: Codable, Sendable {
-    public struct Size: Codable, Sendable {
-        public let longestEdge: Int
-        enum CodingKeys: String, CodingKey {
-            case longestEdge = "longest_edge"
-        }
+@Codable
+public struct Idefics3ProcessorConfiguration: Sendable {
+
+    @Codable
+    public struct Size: Sendable {
+        @CodingKey("longest_edge") public var longestEdge: Int
     }
 
-    public let imageMean: [CGFloat]
-    public let imageStd: [CGFloat]
-    public let size: Size
-    public let imageSequenceLength: Int?
+    @CodingKey("image_mean") public var imageMean: [CGFloat]
+    @CodingKey("image_std") public var imageStd: [CGFloat]
+    public var size: Size
+    @CodingKey("image_seq_len") public var imageSequenceLength: Int?
 
     public var imageMeanTuple: (CGFloat, CGFloat, CGFloat) {
         (imageMean[0], imageMean[1], imageMean[2])
@@ -791,13 +731,6 @@ public struct Idefics3ProcessorConfiguration: Codable, Sendable {
     public var imageStdTuple: (CGFloat, CGFloat, CGFloat) {
         (imageStd[0], imageStd[1], imageStd[2])
     }
-
-    enum CodingKeys: String, CodingKey {
-        case imageMean = "image_mean"
-        case imageStd = "image_std"
-        case size
-        case imageSequenceLength = "image_seq_len"
-    }
 }
 
 // MARK: - Processor
diff --git a/Libraries/MLXVLM/Models/Paligemma.swift b/Libraries/MLXVLM/Models/Paligemma.swift
index 3cd3c5de..89e551f5 100644
--- a/Libraries/MLXVLM/Models/Paligemma.swift
+++ b/Libraries/MLXVLM/Models/Paligemma.swift
@@ -8,6 +8,7 @@ import Hub
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
 // MARK: - Language
@@ -618,100 +619,63 @@ public class PaliGemma: Module, VLMModel, KVCacheDimensionProvider {
 // MARK: - Configuration
 
 /// Confguration for ``PaliGemma``
-public struct PaliGemmaConfiguration: Codable, Sendable {
-
-    public struct TextConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let hiddenSize: Int
-        public let hiddenLayers: Int
-        public let intermediateSize: Int
-        public let attentionHeads: Int
-        public let kvHeads: Int
-        public let vocabularySize: Int
-        private let _rmsNormEps: Float?
-        public var rmsNormEps: Float { _rmsNormEps ?? 1e-6 }
-        private let _ropeTheta: Float?
-        public var ropeTheta: Float { _ropeTheta ?? 10_000 }
-        private let _ropeTraditional: Bool?
-        public var ropeTraditional: Bool { _ropeTraditional ?? false }
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case hiddenSize = "hidden_size"
-            case hiddenLayers = "num_hidden_layers"
-            case intermediateSize = "intermediate_size"
-            case attentionHeads = "num_attention_heads"
-            case kvHeads = "num_key_value_heads"
-            case vocabularySize = "vocab_size"
-            case _rmsNormEps = "rms_norm_eps"
-            case _ropeTheta = "rope_theta"
-            case _ropeTraditional = "rope_traditional"
-        }
-    }
-
-    public struct VisionConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let hiddenSize: Int
-        public let hiddenLayers: Int
-        public let intermediateSize: Int
-        public let attentionHeads: Int
-        public let patchSize: Int
-        public let projectionDimensions: Int
-        public let imageSize: Int
-        private let _channels: Int?
-        public var channels: Int { _channels ?? 3 }
-        private let _layerNormEps: Float?
-        public var layerNormEps: Float { _layerNormEps ?? 1e-6 }
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case hiddenSize = "hidden_size"
-            case hiddenLayers = "num_hidden_layers"
-            case intermediateSize = "intermediate_size"
-            case attentionHeads = "num_attention_heads"
-            case patchSize = "patch_size"
-            case projectionDimensions = "projection_dim"
-            case imageSize = "image_size"
-            case _channels = "num_channels"
-            case _layerNormEps = "layer_norm_eps"
-        }
-    }
-
-    public let textConfiguration: TextConfiguration
-    public let visionConfiguration: VisionConfiguration
-    public let modelType: String
-    public let vocabularySize: Int
-    public let ignoreIndex: Int
-    public let imageTokenIndex: Int
-    public let hiddenSize: Int
-    public let padTokenId: Int
-
-    enum CodingKeys: String, CodingKey {
-        case textConfiguration = "text_config"
-        case visionConfiguration = "vision_config"
-        case modelType = "model_type"
-        case vocabularySize = "vocab_size"
-        case ignoreIndex = "ignore_index"
-        case imageTokenIndex = "image_token_index"
-        case hiddenSize = "hidden_size"
-        case padTokenId = "pad_token_id"
-    }
+@Codable
+public struct PaliGemmaConfiguration: Sendable {
+
+    @Codable
+    public struct TextConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("num_attention_heads") public var attentionHeads: Int
+        @CodingKey("num_key_value_heads") public var kvHeads: Int
+        @CodingKey("vocab_size") public var vocabularySize: Int
+        @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6
+        @CodingKey("rope_theta") public var ropeTheta: Float = 10_000
+        @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+    }
+
+    @Codable
+    public struct VisionConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("num_attention_heads") public var attentionHeads: Int
+        @CodingKey("patch_size") public var patchSize: Int
+        @CodingKey("projection_dim") public var projectionDimensions: Int
+        @CodingKey("image_size") public var imageSize: Int
+        @CodingKey("num_channels") public var channels: Int = 3
+        @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6
+    }
+
+    @CodingKey("text_config") public var textConfiguration: TextConfiguration
+    @CodingKey("vision_config") public var visionConfiguration: VisionConfiguration
+    @CodingKey("model_type") public var modelType: String
+    @CodingKey("vocab_size") public var vocabularySize: Int
+    @CodingKey("ignore_index") public var ignoreIndex: Int
+    @CodingKey("image_token_index") public var imageTokenIndex: Int
+    @CodingKey("hidden_size") public var hiddenSize: Int
+    @CodingKey("pad_token_id") public var padTokenId: Int
 }
 
 /// Configuration for ``PaliGemmaProcessor``
-public struct PaliGemmaProcessorConfiguration: Codable, Sendable {
+@Codable
+public struct PaliGemmaProcessorConfiguration: Sendable {
 
-    public struct Size: Codable, Sendable {
-        public let width: Int
-        public let height: Int
+    @Codable
+    public struct Size: Sendable {
+        public var width: Int
+        public var height: Int
 
         var cgSize: CGSize { .init(width: width, height: height) }
     }
 
-    public let imageMean: [CGFloat]
-    public let imageStd: [CGFloat]
-    public let size: Size
-    public let imageSequenceLength: Int
+    @CodingKey("image_mean") public var imageMean: [CGFloat]
+    @CodingKey("image_std") public var imageStd: [CGFloat]
+    public var size: Size
+    @CodingKey("image_seq_length") public var imageSequenceLength: Int
 
     public var imageMeanTuple: (CGFloat, CGFloat, CGFloat) {
         (imageMean[0], imageMean[1], imageMean[2])
@@ -719,11 +683,4 @@ public struct PaliGemmaProcessorConfiguration: Codable, Sendable {
     public var imageStdTuple: (CGFloat, CGFloat, CGFloat) {
         (imageStd[0], imageStd[1], imageStd[2])
     }
-
-    enum CodingKeys: String, CodingKey {
-        case imageMean = "image_mean"
-        case imageStd = "image_std"
-        case size
-        case imageSequenceLength = "image_seq_length"
-    }
 }
diff --git a/Libraries/MLXVLM/Models/Qwen25VL.swift b/Libraries/MLXVLM/Models/Qwen25VL.swift
index cfd346e4..33cbbc6d 100644
--- a/Libraries/MLXVLM/Models/Qwen25VL.swift
+++ b/Libraries/MLXVLM/Models/Qwen25VL.swift
@@ -6,6 +6,7 @@ import Hub
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
 // MARK: - Language
@@ -904,125 +905,62 @@ public class Qwen25VL: Module, VLMModel, KVCacheDimensionProvider {
 /// Configuration for ``Qwen25VL``
 public struct Qwen25VLConfiguration: Codable, Sendable {
 
-    public struct TextConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let hiddenSize: Int
-        public let hiddenLayers: Int
-        public let intermediateSize: Int
-        public let attentionHeads: Int
-        private let _rmsNormEps: Float?
-        public var rmsNormEps: Float { _rmsNormEps ?? 1e-6 }
-        public let vocabularySize: Int
-        public let kvHeads: Int
-        private let _maxPositionEmbeddings: Int?
-        public var maxPositionEmbeddings: Int { _maxPositionEmbeddings ?? 128000 }
-        private let _ropeTheta: Float?
-        public var ropeTheta: Float { _ropeTheta ?? 1_000_000 }
-        private let _ropeTraditional: Bool?
-        public var ropeTraditional: Bool { _ropeTraditional ?? false }
-        public let ropeScaling: [String: StringOrNumber]?
-        private let _tieWordEmbeddings: Bool?
-        public var tieWordEmbeddings: Bool { _tieWordEmbeddings ?? true }
-        private let _slidingWindow: Int?
-        public var slidingWindow: Int { _slidingWindow ?? 32768 }
-        private let _useSlidingWindow: Bool?
-        public var useSlidingWindow: Bool { _useSlidingWindow ?? false }
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case hiddenSize = "hidden_size"
-            case hiddenLayers = "num_hidden_layers"
-            case intermediateSize = "intermediate_size"
-            case attentionHeads = "num_attention_heads"
-            case _rmsNormEps = "rms_norm_eps"
-            case vocabularySize = "vocab_size"
-            case kvHeads = "num_key_value_heads"
-            case _maxPositionEmbeddings = "max_position_embeddings"
-            case _ropeTheta = "rope_theta"
-            case _ropeTraditional = "rope_traditional"
-            case ropeScaling = "rope_scaling"
-            case _tieWordEmbeddings = "tie_word_embeddings"
-            case _slidingWindow = "sliding_window"
-            case _useSlidingWindow = "use_sliding_window"
-        }
+    @Codable
+    public struct TextConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("num_attention_heads") public var attentionHeads: Int
+        @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6
+        @CodingKey("vocab_size") public var vocabularySize: Int
+        @CodingKey("num_key_value_heads") public var kvHeads: Int
+        @CodingKey("max_position_embeddings") public var maxPositionEmbeddings: Int = 128000
+        @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000
+        @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+        @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]?
+        @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
+        @CodingKey("sliding_window") public var slidingWindow: Int = 32768
+        @CodingKey("use_sliding_window") public var useSlidingWindow: Bool = false
     }
 
-    public struct VisionConfiguration: Codable, Sendable {
-        public let depth: Int
-        public let hiddenSize: Int
-        public let intermediateSize: Int
-        public let outHiddenSize: Int
-        public let numHeads: Int
-        public let patchSize: Int
-        private let _inChans: Int?
-        public var inChannels: Int { _inChans ?? 3 }
-        private let _layerNormEps: Float?
-        public var layerNormEps: Float { _layerNormEps ?? 1e-6 }
-        public let spatialPatchSize: Int
-        public let spatialMergeSize: Int
-        public let temporalPatchSize: Int
-        public let windowSize: Int
-        public let fullattBlockIndexes: [Int]
-        public let tokensPerSecond: Int
-        private let _skipVision: Bool?
-        public var skipVision: Bool { _skipVision ?? false }
-        private let _hiddenAct: String?
-        public var hiddenAct: String { _hiddenAct ?? "silu" }
-
-        enum CodingKeys: String, CodingKey {
-            case depth
-            case hiddenSize = "hidden_size"
-            case intermediateSize = "intermediate_size"
-            case outHiddenSize = "out_hidden_size"
-            case numHeads = "num_heads"
-            case patchSize = "patch_size"
-            case _inChans = "in_chans"
-            case _layerNormEps = "layer_norm_eps"  // Added this line
-            case spatialPatchSize = "spatial_patch_size"
-            case spatialMergeSize = "spatial_merge_size"
-            case temporalPatchSize = "temporal_patch_size"
-            case windowSize = "window_size"
-            case fullattBlockIndexes = "fullatt_block_indexes"
-            case tokensPerSecond = "tokens_per_second"
-            case _skipVision = "skip_vision"
-            case _hiddenAct = "hidden_act"
-        }
+    @Codable
+    public struct VisionConfiguration: Sendable {
+        public var depth: Int
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("out_hidden_size") public var outHiddenSize: Int
+        @CodingKey("num_heads") public var numHeads: Int
+        @CodingKey("patch_size") public var patchSize: Int
+        @CodingKey("in_chans") public var inChannels: Int = 3
+        @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6
+        @CodingKey("spatial_patch_size") public var spatialPatchSize: Int
+        @CodingKey("spatial_merge_size") public var spatialMergeSize: Int
+        @CodingKey("temporal_patch_size") public var temporalPatchSize: Int
+        @CodingKey("window_size") public var windowSize: Int
+        @CodingKey("fullatt_block_indexes") public var fullattBlockIndexes: [Int]
+        @CodingKey("tokens_per_second") public var tokensPerSecond: Int
+        @CodingKey("skip_vision") public var skipVision: Bool = false
+        @CodingKey("hidden_act") public var hiddenAct: String = "silu"
     }
 
+    @Codable
     public struct BaseConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let vocabularySize: Int
-        public let imageTokenId: Int
-        public let videoTokenId: Int
-        public let visionStartTokenId: Int
-        public let visionEndTokenId: Int
-        public let visionTokenId: Int
-        public let hiddenSize: Int
-        public let numAttentionHeads: Int
-        public let numHiddenLayers: Int
-        public let intermediateSize: Int
-        public let numKeyValueHeads: Int
-        public let slidingWindow: Int
-        public let useSlidingWindow: Bool
-        public let maxWindowLayers: Int
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case vocabularySize = "vocab_size"
-            case imageTokenId = "image_token_id"
-            case videoTokenId = "video_token_id"
-            case visionStartTokenId = "vision_start_token_id"
-            case visionEndTokenId = "vision_end_token_id"
-            case visionTokenId = "vision_token_id"
-            case hiddenSize = "hidden_size"
-            case numAttentionHeads = "num_attention_heads"
-            case numHiddenLayers = "num_hidden_layers"
-            case intermediateSize = "intermediate_size"
-            case numKeyValueHeads = "num_key_value_heads"
-            case slidingWindow = "sliding_window"
-            case useSlidingWindow = "use_sliding_window"
-            case maxWindowLayers = "max_window_layers"
-        }
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("vocab_size") public var vocabularySize: Int
+        @CodingKey("image_token_id") public var imageTokenId: Int
+        @CodingKey("video_token_id") public var videoTokenId: Int
+        @CodingKey("vision_start_token_id") public var visionStartTokenId: Int
+        @CodingKey("vision_end_token_id") public var visionEndTokenId: Int
+        @CodingKey("vision_token_id") public var visionTokenId: Int
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_attention_heads") public var numAttentionHeads: Int
+        @CodingKey("num_hidden_layers") public var numHiddenLayers: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("num_key_value_heads") public var numKeyValueHeads: Int
+        @CodingKey("sliding_window") public var slidingWindow: Int
+        @CodingKey("use_sliding_window") public var useSlidingWindow: Bool
+        @CodingKey("max_window_layers") public var maxWindowLayers: Int
     }
 
     public let textConfiguration: TextConfiguration
@@ -1044,6 +982,14 @@ public struct Qwen25VLConfiguration: Codable, Sendable {
         self.textConfiguration = try TextConfiguration(from: decoder)
         self.baseConfiguration = try BaseConfiguration(from: decoder)
     }
+
+    public func encode(to encoder: any Encoder) throws {
+        var container = try encoder.container(keyedBy: CodingKeys.self)
+
+        try container.encode(visionConfiguration, forKey: .visionConfiguration)
+        try textConfiguration.encode(to: encoder)
+        try baseConfiguration.encode(to: encoder)
+    }
 }
 
 /// Configuration for ``Qwen25VLProcessor``
diff --git a/Libraries/MLXVLM/Models/Qwen2VL.swift b/Libraries/MLXVLM/Models/Qwen2VL.swift
index 8d735054..f5fae762 100644
--- a/Libraries/MLXVLM/Models/Qwen2VL.swift
+++ b/Libraries/MLXVLM/Models/Qwen2VL.swift
@@ -8,6 +8,7 @@ import Hub
 import MLX
 import MLXLMCommon
 import MLXNN
+import ReerCodable
 import Tokenizers
 
 // MARK: - Language
@@ -746,87 +747,45 @@ public class Qwen2VL: Module, VLMModel, KVCacheDimensionProvider {
 /// Configuration for ``Qwen2VL``
 public struct Qwen2VLConfiguration: Codable, Sendable {
 
-    public struct TextConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let hiddenSize: Int
-        public let hiddenLayers: Int
-        public let intermediateSize: Int
-        public let attentionHeads: Int
-        private let _rmsNormEps: Float?
-        public var rmsNormEps: Float { _rmsNormEps ?? 1e-6 }
-        public let vocabularySize: Int
-        public let kvHeads: Int
-        private let _maxPositionEmbeddings: Int?
-        public var maxpPositionEmbeddings: Int { _maxPositionEmbeddings ?? 32768 }
-        private let _ropeTheta: Float?
-        public var ropeTheta: Float { _ropeTheta ?? 1_000_000 }
-        private let _ropeTraditional: Bool?
-        public var ropeTraditional: Bool { _ropeTraditional ?? false }
-        public let ropeScaling: [String: StringOrNumber]?
-        private let _tieWordEmbeddings: Bool?
-        public var tieWordEmbeddings: Bool { _tieWordEmbeddings ?? true }
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case hiddenSize = "hidden_size"
-            case hiddenLayers = "num_hidden_layers"
-            case intermediateSize = "intermediate_size"
-            case attentionHeads = "num_attention_heads"
-            case _rmsNormEps = "rms_norm_eps"
-            case vocabularySize = "vocab_size"
-            case kvHeads = "num_key_value_heads"
-            case _maxPositionEmbeddings = "max_position_embeddings"
-            case _ropeTheta = "rope_theta"
-            case _ropeTraditional = "rope_traditional"
-            case ropeScaling = "rope_scaling"
-            case _tieWordEmbeddings = "tie_word_embeddings"
-        }
-    }
-
-    public struct VisionConfiguration: Codable, Sendable {
-        public let depth: Int
-        public let embedDimensions: Int
-        public let hiddenSize: Int
-        public let numHeads: Int
-        public let patchSize: Int
-        public let mlpRatio: Float
-        public let _inChannels: Int?
-        public var inChannels: Int { _inChannels ?? 3 }
-        public let _layerNormEps: Float?
-        public var layerNormEps: Float { _layerNormEps ?? 1e-6 }
-        public let spatialPatchSize: Int
-        public let spatialMergeSize: Int
-        public let temporalPatchSize: Int
-
-        enum CodingKeys: String, CodingKey {
-            case depth
-            case embedDimensions = "embed_dim"
-            case hiddenSize = "hidden_size"
-            case numHeads = "num_heads"
-            case patchSize = "patch_size"
-            case mlpRatio = "mlp_ratio"
-            case _inChannels = "in_channels"
-            case _layerNormEps = "layer_norm_eps"
-            case spatialPatchSize = "spatial_patch_size"
-            case spatialMergeSize = "spatial_merge_size"
-            case temporalPatchSize = "temporal_patch_size"
-        }
-    }
-
-    public struct BaseConfiguration: Codable, Sendable {
-        public let modelType: String
-        public let vocabularySize: Int
-        public let imageTokenId: Int
-        public let videoTokenId: Int
-        public let hiddenSize: Int
-
-        enum CodingKeys: String, CodingKey {
-            case modelType = "model_type"
-            case vocabularySize = "vocab_size"
-            case imageTokenId = "image_token_id"
-            case videoTokenId = "video_token_id"
-            case hiddenSize = "hidden_size"
-        }
+    @Codable
+    public struct TextConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_hidden_layers") public var hiddenLayers: Int
+        @CodingKey("intermediate_size") public var intermediateSize: Int
+        @CodingKey("num_attention_heads") public var attentionHeads: Int
+        @CodingKey("rms_norm_eps") public var rmsNormEps: Float = 1e-6
+        @CodingKey("vocab_size") public var vocabularySize: Int
+        @CodingKey("num_key_value_heads") public var kvHeads: Int
+        @CodingKey("max_position_embeddings") public var maxpPositionEmbeddings: Int = 32768
+        @CodingKey("rope_theta") public var ropeTheta: Float = 1_000_000
+        @CodingKey("rope_traditional") public var ropeTraditional: Bool = false
+        @CodingKey("rope_scaling") public var ropeScaling: [String: StringOrNumber]?
+        @CodingKey("tie_word_embeddings") public var tieWordEmbeddings: Bool = true
+    }
+
+    @Codable
+    public struct VisionConfiguration: Sendable {
+        public var depth: Int
+        @CodingKey("embed_dim") public var embedDimensions: Int
+        @CodingKey("hidden_size") public var hiddenSize: Int
+        @CodingKey("num_heads") public var numHeads: Int
+        @CodingKey("patch_size") public var patchSize: Int
+        @CodingKey("mlp_ratio") public var mlpRatio: Float
+        @CodingKey("in_channels") public var inChannels: Int = 3
+        @CodingKey("layer_norm_eps") public var layerNormEps: Float = 1e-6
+        @CodingKey("spatial_patch_size") public var spatialPatchSize: Int
+        @CodingKey("spatial_merge_size") public var spatialMergeSize: Int
+        @CodingKey("temporal_patch_size") public var temporalPatchSize: Int
+    }
+
+    @Codable
+    public struct BaseConfiguration: Sendable {
+        @CodingKey("model_type") public var modelType: String
+        @CodingKey("vocab_size") public var vocabularySize: Int
+        @CodingKey("image_token_id") public var imageTokenId: Int
+        @CodingKey("video_token_id") public var videoTokenId: Int
+        @CodingKey("hidden_size") public var hiddenSize: Int
     }
 
     public let textConfiguration: TextConfiguration
@@ -848,30 +807,34 @@ public struct Qwen2VLConfiguration: Codable, Sendable {
         self.textConfiguration = try TextConfiguration(from: decoder)
         self.baseConfiguration = try BaseConfiguration(from: decoder)
     }
+
+    public func encode(to encoder: any Encoder) throws {
+        var container = try encoder.container(keyedBy: CodingKeys.self)
+
+        try container.encode(visionConfiguration, forKey: .visionConfiguration)
+        try textConfiguration.encode(to: encoder)
+        try baseConfiguration.encode(to: encoder)
+    }
 }
 
 /// Configuration for ``Qwen2VLProcessor``
-public struct Qwen2VLProcessorConfiguration: Codable, Sendable {
+@Codable
+public struct Qwen2VLProcessorConfiguration: Sendable {
 
-    public struct Size: Codable, Sendable {
-        public let maxPixels: Int
-        public let minPixels: Int
-
-        enum CodingKeys: String, CodingKey {
-            case maxPixels = "max_pixels"
-            case minPixels = "min_pixels"
-        }
+    @Codable
+    public struct Size: Sendable {
+        @CodingKey("max_pixels") public var maxPixels: Int
+        @CodingKey("min_pixels") public var minPixels: Int
     }
 
-    public let imageMean: [CGFloat]
-    public let imageStd: [CGFloat]
-    public let mergeSize: Int
-    public let patchSize: Int
-    public let temporalPatchSize: Int
-
-    private let _size: Size?
-    private let _maxPixels: Int?
-    private let _minPixels: Int?
+    @CodingKey("image_mean") public var imageMean: [CGFloat]
+    @CodingKey("image_std") public var imageStd: [CGFloat]
+    @CodingKey("merge_size") public var mergeSize: Int
+    @CodingKey("patch_size") public var patchSize: Int
+    @CodingKey("temporal_patch_size") public var temporalPatchSize: Int
+    @CodingKey("max_pixels") private var _maxPixels: Int?
+    @CodingKey("min_pixels") private var _minPixels: Int?
+    @CodingKey("size") private var _size: Size?
 
     public var minPixels: Int {
         _minPixels ?? _size?.minPixels ?? 3136
@@ -886,17 +849,6 @@ public struct Qwen2VLProcessorConfiguration: Codable, Sendable {
     public var imageStdTuple: (CGFloat, CGFloat, CGFloat) {
         (imageStd[0], imageStd[1], imageStd[2])
     }
-
-    enum CodingKeys: String, CodingKey {
-        case imageMean = "image_mean"
-        case imageStd = "image_std"
-        case mergeSize = "merge_size"
-        case patchSize = "patch_size"
-        case temporalPatchSize = "temporal_patch_size"
-        case _maxPixels = "max_pixels"
-        case _minPixels = "min_pixels"
-        case _size = "size"
-    }
 }
 
 /// Message Generator for Qwen2VL
diff --git a/Libraries/MLXVLM/Models/SmolVLM2.swift b/Libraries/MLXVLM/Models/SmolVLM2.swift
index b75a9717..b6756038 100644
--- a/Libraries/MLXVLM/Models/SmolVLM2.swift
+++ b/Libraries/MLXVLM/Models/SmolVLM2.swift
@@ -10,6 +10,7 @@ import CoreMedia
 import Foundation
 import MLX
 import MLXLMCommon
+import ReerCodable
 import Tokenizers
 
 // MARK: - Configuration and modeling are Idefics3
@@ -18,47 +19,28 @@ typealias SmolVLM2Configuration = Idefics3Configuration
 typealias SmolVLM2 = Idefics3
 
 // MARK: - SmolVLMProcessor and configuration
+@Codable
+public struct SmolVLMProcessorConfiguration: Sendable {
 
-public struct SmolVLMProcessorConfiguration: Codable, Sendable {
-    public struct Size: Codable, Sendable {
-        public let longestEdge: Int
-        enum CodingKeys: String, CodingKey {
-            case longestEdge = "longest_edge"
-        }
+    @Codable
+    public struct Size: Sendable {
+        @CodingKey("longest_edge") public var longestEdge: Int
     }
 
-    public struct VideoSampling: Codable, Sendable {
-        public let fps: Int
-        public let maxFrames: Int
+    @Codable
+    public struct VideoSampling: Sendable {
+        public var fps: Int
+        @CodingKey("max_frames") public var maxFrames: Int
         // Intentionally ignoring videoSize because I believe it's still wrong in the config files
         //        public let videoSize: Size
-
-        enum CodingKeys: String, CodingKey {
-            case fps
-            case maxFrames = "max_frames"
-        }
     }
 
-    public let imageMean: [CGFloat]
-    public let imageStd: [CGFloat]
-    public let size: Size
-    public let maxImageSize: Size
-    public let videoSampling: VideoSampling
-    private let _imageSequenceLength: Int?
-    // TODO: this does not come in preprocessor_config.json, verify where transformers gets it from
-    public var imageSequenceLength: Int { _imageSequenceLength ?? 64 }
-
-    init(
-        imageMean: [CGFloat], imageStd: [CGFloat], size: Size, maxImageSize: Size,
-        videoSampling: VideoSampling, imageSequenceLength: Int?
-    ) {
-        self.imageMean = imageMean
-        self.imageStd = imageStd
-        self.size = size
-        self.maxImageSize = maxImageSize
-        self.videoSampling = videoSampling
-        self._imageSequenceLength = imageSequenceLength
-    }
+    @CodingKey("image_mean") public var imageMean: [CGFloat]
+    @CodingKey("image_std") public var imageStd: [CGFloat]
+    public var size: Size
+    @CodingKey("max_image_size") public var maxImageSize: Size
+    @CodingKey("video_sampling") public var videoSampling: VideoSampling
+    @CodingKey("image_seq_len") public var imageSequenceLength: Int
 
     public var imageMeanTuple: (CGFloat, CGFloat, CGFloat) {
         (imageMean[0], imageMean[1], imageMean[2])
@@ -66,15 +48,6 @@ public struct SmolVLMProcessorConfiguration: Codable, Sendable {
     public var imageStdTuple: (CGFloat, CGFloat, CGFloat) {
         (imageStd[0], imageStd[1], imageStd[2])
     }
-
-    enum CodingKeys: String, CodingKey {
-        case imageMean = "image_mean"
-        case imageStd = "image_std"
-        case size
-        case maxImageSize = "max_image_size"
-        case videoSampling = "video_sampling"
-        case _imageSequenceLength = "image_seq_len"
-    }
 }
 
 public class SmolVLMProcessor: UserInputProcessor {
diff --git a/Libraries/StableDiffusion/Configuration.swift b/Libraries/StableDiffusion/Configuration.swift
index c39a06ff..e8afa346 100644
--- a/Libraries/StableDiffusion/Configuration.swift
+++ b/Libraries/StableDiffusion/Configuration.swift
@@ -7,7 +7,7 @@ import MLXNN
 // port of https://github.com/ml-explore/mlx-examples/blob/main/stable_diffusion/stable_diffusion/config.py
 
 /// Configuration for ``Autoencoder``
-struct AutoencoderConfiguration: Codable {
+struct AutoencoderConfiguration: Codable, Sendable {
 
     public var inputChannels = 3
     public var outputChannels = 3
@@ -60,7 +60,7 @@ struct AutoencoderConfiguration: Codable {
 }
 
 /// Configuration for ``CLIPTextModel``
-struct CLIPTextModelConfiguration: Codable {
+struct CLIPTextModelConfiguration: Codable, Sendable {
 
     public enum ClipActivation: String, Codable {
         case fast = "quick_gelu"
@@ -137,7 +137,7 @@ struct CLIPTextModelConfiguration: Codable {
 }
 
 /// Configuration for ``UNetModel``
-struct UNetConfiguration: Codable {
+struct UNetConfiguration: Codable, Sendable {
 
     public var inputChannels = 4
     public var outputChannels = 4
@@ -250,7 +250,7 @@ struct UNetConfiguration: Codable {
 }
 
 /// Configuration for ``StableDiffusion``
-public struct DiffusionConfiguration: Codable {
+public struct DiffusionConfiguration: Codable, Sendable {
 
     public enum BetaSchedule: String, Codable {
         case linear = "linear"
diff --git a/Package.swift b/Package.swift
index 4233f481..1affcee1 100644
--- a/Package.swift
+++ b/Package.swift
@@ -31,6 +31,7 @@ let package = Package(
         .package(
             url: "https://github.com/huggingface/swift-transformers", .upToNextMinor(from: "0.1.21")
         ),
+        .package(url: "https://github.com/reers/ReerCodable.git", from: "1.2.3"),
         .package(url: "https://github.com/1024jp/GzipSwift", "6.0.1" ... "6.0.1"),  // Only needed by MLXMNIST
     ],
     targets: [
@@ -44,6 +45,7 @@ let package = Package(
                 .product(name: "MLXOptimizers", package: "mlx-swift"),
                 .product(name: "MLXRandom", package: "mlx-swift"),
                 .product(name: "Transformers", package: "swift-transformers"),
+                .product(name: "ReerCodable", package: "ReerCodable"),
             ],
             path: "Libraries/MLXLLM",
             exclude: [
@@ -63,6 +65,7 @@ let package = Package(
                 .product(name: "MLXOptimizers", package: "mlx-swift"),
                 .product(name: "MLXRandom", package: "mlx-swift"),
                 .product(name: "Transformers", package: "swift-transformers"),
+                .product(name: "ReerCodable", package: "ReerCodable"),
             ],
             path: "Libraries/MLXVLM",
             exclude: [
@@ -81,6 +84,7 @@ let package = Package(
                 .product(name: "MLXRandom", package: "mlx-swift"),
                 .product(name: "MLXLinalg", package: "mlx-swift"),
                 .product(name: "Transformers", package: "swift-transformers"),
+                .product(name: "ReerCodable", package: "ReerCodable"),
             ],
             path: "Libraries/MLXLMCommon",
             exclude: [
@@ -150,6 +154,7 @@ let package = Package(
                 .product(name: "MLXNN", package: "mlx-swift"),
                 .product(name: "MLXRandom", package: "mlx-swift"),
                 .product(name: "Transformers", package: "swift-transformers"),
+                .product(name: "ReerCodable", package: "ReerCodable"),
             ],
             path: "Libraries/StableDiffusion",
             exclude: [
diff --git a/mlx-swift-examples.xcodeproj/project.pbxproj b/mlx-swift-examples.xcodeproj/project.pbxproj
index df8f9ccd..2c0f3985 100644
--- a/mlx-swift-examples.xcodeproj/project.pbxproj
+++ b/mlx-swift-examples.xcodeproj/project.pbxproj
@@ -1042,6 +1042,7 @@
 				C397D8F22CD2F60B00B87EE2 /* XCLocalSwiftPackageReference "Libraries/.." */,
 				C32A18442D00E13E0092A5B6 /* XCRemoteSwiftPackageReference "mlx-swift" */,
 				C32B4C6B2DA7132C00EF663D /* XCRemoteSwiftPackageReference "swift-async-algorithms" */,
+				C3FF946F2DD54E170070900D /* XCRemoteSwiftPackageReference "ReerCodable" */,
 			);
 			productRefGroup = C39273752B606A0A00368D5D /* Products */;
 			projectDirPath = "";
@@ -3285,6 +3286,14 @@
 				minimumVersion = 1.4.0;
 			};
 		};
+		C3FF946F2DD54E170070900D /* XCRemoteSwiftPackageReference "ReerCodable" */ = {
+			isa = XCRemoteSwiftPackageReference;
+			repositoryURL = "https://github.com/reers/ReerCodable.git";
+			requirement = {
+				kind = upToNextMajorVersion;
+				minimumVersion = 1.2.3;
+			};
+		};
 /* End XCRemoteSwiftPackageReference section */
 
 /* Begin XCSwiftPackageProductDependency section */
diff --git a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
index ee2466cb..510a7fe6 100644
--- a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -46,6 +46,15 @@
         "version" : "0.4.0"
       }
     },
+    {
+      "identity" : "reercodable",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/reers/ReerCodable.git",
+      "state" : {
+        "revision" : "a7b1eb0e93da899163f929e15608c325845e3430",
+        "version" : "1.2.3"
+      }
+    },
     {
       "identity" : "swift-argument-parser",
       "kind" : "remoteSourceControl",
@@ -100,6 +109,15 @@
         "version" : "1.0.3"
       }
     },
+    {
+      "identity" : "swift-syntax",
+      "kind" : "remoteSourceControl",
+      "location" : "https://github.com/swiftlang/swift-syntax.git",
+      "state" : {
+        "revision" : "f99ae8aa18f0cf0d53481901f88a0991dc3bd4a2",
+        "version" : "601.0.1"
+      }
+    },
     {
       "identity" : "swift-transformers",
       "kind" : "remoteSourceControl",