ml-explore
diff --git a/‎Libraries/MLXLLM/LoraTrain.swift
Lines changed: 2 additions & 2 deletions b/‎Libraries/MLXLLM/LoraTrain.swift
Lines changed: 2 additions & 2 deletions
diff --git a/‎Libraries/MLXLMCommon/Adapters/LoRA/DoRA+Layers.swift
Lines changed: 182 additions & 0 deletions b/‎Libraries/MLXLMCommon/Adapters/LoRA/DoRA+Layers.swift
Lines changed: 182 additions & 0 deletions
diff --git a/‎Libraries/MLXLMCommon/Lora.swift renamed to ‎Libraries/MLXLMCommon/Adapters/LoRA/LoRA+Layers.swift
Lines changed: 20 additions & 59 deletions b/‎Libraries/MLXLMCommon/Lora.swift renamed to ‎Libraries/MLXLMCommon/Adapters/LoRA/LoRA+Layers.swift
Lines changed: 20 additions & 59 deletions
@@ -206,8 +206,8 @@ public enum LoRATrain {
             let children = layer.children()
             for key in keys {
                 if let item = children[key], case .value(let child) = item {
-                    if let lora = child as? LoRAConvertToLinear {
-                        update[key] = .value(lora.toLinear(deQuantize: deQuantize))
+                    if let lora = child as? LoRALayer {
+                        update[key] = .value(lora.fused())
                     }
                 }
             }
 
@@ -0,0 +1,182 @@
+//
+//  DoRA+Layers.swift
+//  mlx-libraries
+//
+//  Created by Ivan Petrukha on 02.06.2025.
+//
+
+import Foundation
+import MLX
+import MLXLinalg
+import MLXNN
+import MLXRandom
+
+/// Performs the forward pass for a DoRA linear layer.
+private func forward(
+    x: MLXArray, y: MLXArray,
+    weight: MLXArray, bias: MLXArray?,
+    loraA: MLXArray, loraB: MLXArray,
+    scale: Float, magnitude: MLXArray
+) -> MLXArray {
+    let z = matmul(matmul(x, loraA), loraB)
+    var out = y + (scale * z).asType(x.dtype)
+
+    let adapted = weight + matmul(scale * loraB.T, loraA.T)
+    let denom = norm(adapted, axis: 1)
+    out *= (magnitude / denom).asType(x.dtype)
+
+    return if let bias {
+        out + bias
+    } else {
+        out
+    }
+}
+
+/// Fuses the base weights with the DoRA parameters.
+private func fuse(
+    weight: MLXArray,
+    loraA: MLXArray, loraB: MLXArray,
+    scale: Float, magnitude: MLXArray
+) -> MLXArray {
+    let loraA = loraA.T.asType(weight.dtype)
+    let loraB = (scale * loraB.T).asType(weight.dtype)
+
+    var adapted = weight + matmul(loraB, loraA)
+    let denom = norm(adapted, axis: 1)
+    adapted *= (magnitude / denom).reshaped([-1, 1])
+
+    return adapted
+}
+
+/// Filters out DoRA-specific parameters from a list of module keys.
+private func filterFreezeKeys(from module: Module, keys: [String]?) -> [String] {
+    return
+        (keys
+        ?? module.filterMap(filter: type(of: module).filterLocalParameters)
+        .flattened()
+        .map { $0.0 })
+        .filter { !["lora_a", "lora_b", "m"].contains($0) }
+}
+
+/// Implementation of DoRA `Linear` replacement layer.
+///
+/// This layer implements DoRA (Weight-Decomposed Low-Rank Adaptation) for `Linear` layers.
+///
+/// ``QDoRALinear`` is the equivalent class for `QuantizedLinear`.
+public class DoRALinear: Linear, LoRALayer {
+
+    let scale: Float
+
+    @ParameterInfo(key: "lora_a") var loraA: MLXArray
+    @ParameterInfo(key: "lora_b") var loraB: MLXArray
+    @ParameterInfo(key: "m") var magnitude: MLXArray
+
+    required public init(linear: Linear, rank: Int = 8, scale: Float = 20.0) {
+        let (outputDimensions, inputDimensions) = linear.shape
+        let loraScale = 1 / sqrt(Float(inputDimensions))
+
+        self.scale = scale
+        self._loraA.wrappedValue = MLXRandom.uniform(
+            low: -loraScale, high: loraScale, [inputDimensions, rank])
+        self._loraB.wrappedValue = MLXArray.zeros([rank, outputDimensions])
+        self._magnitude.wrappedValue = MLXLinalg.norm(linear.weight, axis: 1)
+
+        super.init(weight: linear.weight, bias: linear.bias)
+
+        freeze()
+    }
+
+    public static func from(linear: Linear, rank: Int = 8, scale: Float = 20.0) -> LoRALayer {
+        if let linear = linear as? QuantizedLinear {
+            QDoRALinear(linear: linear, rank: rank, scale: scale)
+        } else {
+            DoRALinear(linear: linear, rank: rank, scale: scale)
+        }
+    }
+
+    public override func freeze(recursive: Bool = true, keys: [String]? = nil, strict: Bool = false)
+        throws
+    {
+        let keys = filterFreezeKeys(from: self, keys: keys)
+        try super.freeze(recursive: recursive, keys: keys, strict: strict)
+    }
+
+    public func fused() -> Module {
+        Linear(
+            weight: fuse(
+                weight: weight, loraA: loraA, loraB: loraB, scale: scale, magnitude: magnitude),
+            bias: bias
+        )
+    }
+
+    public override func callAsFunction(_ x: MLXArray) -> MLXArray {
+        let y = matmul(x, weight.T)
+        return forward(
+            x: x, y: y,
+            weight: weight, bias: bias,
+            loraA: loraA, loraB: loraB,
+            scale: scale, magnitude: magnitude
+        )
+    }
+}
+
+/// Implementation of DoRA `QuantizedLinear` replacement layer.
+///
+/// See ``DoRALinear`` (equivalent class for `Linear` layers) for more information.
+///
+/// ### See Also
+/// - ``DoRALinear``
+public class QDoRALinear: QuantizedLinear, LoRALayer {
+
+    let scale: Float
+
+    @ParameterInfo(key: "lora_a") var loraA: MLXArray
+    @ParameterInfo(key: "lora_b") var loraB: MLXArray
+    @ParameterInfo(key: "m") var magnitude: MLXArray
+
+    required public init(linear: QuantizedLinear, rank: Int = 8, scale: Float = 20.0) {
+        let (outputDimensions, inputDimensions) = linear.shape
+        let loraScale = 1 / sqrt(Float(inputDimensions))
+
+        self.scale = scale
+        self._loraA.wrappedValue = MLXRandom.uniform(
+            low: -loraScale, high: loraScale, [inputDimensions, rank])
+        self._loraB.wrappedValue = MLXArray.zeros([rank, outputDimensions])
+        self._magnitude.wrappedValue = MLXLinalg.norm(linear.dequantizedWeight, axis: 1)
+
+        super.init(
+            weight: linear.weight, bias: linear.bias,
+            scales: linear.scales, biases: linear.biases,
+            groupSize: linear.groupSize, bits: linear.bits
+        )
+
+        freeze()
+    }
+
+    public override func freeze(recursive: Bool = true, keys: [String]? = nil, strict: Bool = false)
+        throws
+    {
+        let keys = filterFreezeKeys(from: self, keys: keys)
+        try super.freeze(recursive: recursive, keys: keys, strict: strict)
+    }
+
+    public func fused() -> Module {
+        QuantizedLinear(
+            weight: fuse(
+                weight: dequantizedWeight, loraA: loraA, loraB: loraB, scale: scale,
+                magnitude: magnitude),
+            bias: bias, groupSize: groupSize, bits: bits
+        )
+    }
+
+    public override func callAsFunction(_ x: MLXArray) -> MLXArray {
+        let y = quantizedMatmul(
+            x, weight, scales: scales, biases: biases, groupSize: groupSize, bits: bits)
+        return forward(
+            x: x, y: y,
+            weight: dequantizedWeight, bias: bias,
+            loraA: loraA, loraB: loraB,
+            scale: scale, magnitude: magnitude
+        )
+    }
+}
@@ -4,49 +4,7 @@ import Foundation
 import MLX
 import MLXNN
 import MLXOptimizers
-import Tokenizers
-
-/// Layers to apply LoRA adapters to.
-///
-/// This is the value returned by ``LoRAModel/loraLinearLayers()``.
-public typealias LoRALinearLayers = [(Module, [String])]
-
-public protocol LoRAModel {
-    /// Return the layers and keys to apply LoRA adapters to.
-    ///
-    /// For example this might apply the adapters to the `q` an `v` projections in the
-    /// Attention layers:
-    ///
-    /// ```swift
-    /// model.layers.map { ($0.attention, ["q_proj", "v_proj"]) }
-    /// ```
-    ///
-    /// It is not required that a model implement this protocol to have LoRA adapters applied, but
-    /// the command line driver example uses this to produce the ``LoRALinearLayers``.
-    ///
-    /// ### See Also
-    /// - ``LoRATrain/convert(model:layers:)``
-    func loraLinearLayers() -> LoRALinearLayers
-
-    /// Return a suffix of the layers and keys to apply LoRA adapters to.
-    ///
-    /// See ``loraLinearLayers()``
-    func loraLinearLayers(_ count: Int) -> LoRALinearLayers
-}
-
-extension LoRAModel {
-    public func loraLinearLayers(_ count: Int) -> LoRALinearLayers {
-        loraLinearLayers().suffix(count)
-    }
-}
-
-/// Protocol for LoRA implementations that provides a method for converting back to a `Linear`
-/// (or subtype).
-///
-/// This is normally called via ``LoRATrain/fuse(model:layers:deQuantize:)``
-public protocol LoRAConvertToLinear {
-    func toLinear(deQuantize: Bool) -> Linear
-}
+import MLXRandom
 
 /// Implementation of LoRA `Linear` replacement layer.
 ///
@@ -67,7 +25,7 @@ public protocol LoRAConvertToLinear {
 /// - ``QLoRALinear``
 /// - ``LoRATrain/convert(model:layers:)``
 /// - ``LoRATrain/fuse(model:layers:deQuantize:)``
-public class LoRALinear: Linear, LoRAConvertToLinear {
+public class LoRALinear: Linear, LoRALayer {
 
     let scale: Float
 
@@ -113,12 +71,13 @@ public class LoRALinear: Linear, LoRAConvertToLinear {
     /// ### See Also
     /// - ``LoRATrain/convert(model:layers:)``
     /// - ``QLoRALinear/from(linear:rank:)``
-    public static func from(linear: Linear, rank: Int = 8) -> Linear {
+    public static func from(linear: Linear, rank: Int = 8, scale: Float = 20.0) -> LoRALayer {
         if let linear = linear as? QuantizedLinear {
-            return QLoRALinear.from(linear: linear, rank: rank)
+            return QLoRALinear.from(linear: linear, rank: rank, scale: scale)
         }
         let (outputDimensions, inputDimensions) = linear.shape
-        return LoRALinear(inputDimensions, outputDimensions, rank: rank, linear: linear)
+        return LoRALinear(
+            inputDimensions, outputDimensions, rank: rank, scale: scale, linear: linear)
     }
 
     /// Convert back into a fused `Linear` layer.
@@ -129,7 +88,7 @@ public class LoRALinear: Linear, LoRAConvertToLinear {
     /// - ``LoRATrain/fuse(model:layers:deQuantize:)``
     /// - ``LoRAConvertToLinear``
     /// - ``QLoRALinear/toLinear(deQuantize:)``
-    public func toLinear(deQuantize: Bool = false) -> Linear {
+    public func fused() -> Module {
         let dtype = weight.dtype
         let loraB = (scale * loraB.T).asType(dtype)
         let loraA = loraA.T.asType(dtype)
@@ -146,7 +105,7 @@ public class LoRALinear: Linear, LoRAConvertToLinear {
 /// Implementation of LoRA `QuantizedLinear` replacement layer.
 ///
 /// See ``LoRALinear`` (equivalent class for `Linear` layers) for more information.
-public class QLoRALinear: QuantizedLinear, LoRAConvertToLinear {
+public class QLoRALinear: QuantizedLinear, LoRALayer {
 
     let scale: Float
 
@@ -196,9 +155,12 @@ public class QLoRALinear: QuantizedLinear, LoRAConvertToLinear {
     /// ### See Also
     /// - ``LoRATrain/convert(model:layers:)``
     /// - ``LoRALinear/from(linear:rank:)``
-    public static func from(linear: QuantizedLinear, rank: Int = 8) -> Linear {
+    public static func from(linear: QuantizedLinear, rank: Int = 8, scale: Float = 20.0)
+        -> LoRALayer
+    {
         let (outputDimensions, inputDimensions) = linear.shape
-        return QLoRALinear(inputDimensions, outputDimensions, rank: rank, linear: linear)
+        return QLoRALinear(
+            inputDimensions, outputDimensions, rank: rank, scale: scale, linear: linear)
     }
 
     /// Convert back into a fused `QuantizedLinear` layer.
@@ -207,17 +169,16 @@ public class QLoRALinear: QuantizedLinear, LoRAConvertToLinear {
     ///
     /// ### See Also
     /// - ``LoRATrain/fuse(model:layers:deQuantize:)``
-    public func toLinear(deQuantize: Bool = false) -> Linear {
-        // convert back into full weights
-        let weight = dequantized(
-            weight, scales: scales, biases: biases, groupSize: groupSize, bits: bits)
-
+    public func fused() -> Module {
+        let weight = dequantizedWeight
         let loraB = (scale * loraB.T).asType(.float16)
         let loraA = loraA.T.asType(.float16)
-
-        // convert back into quantized
         return QuantizedLinear(
-            weight: weight + matmul(loraB, loraA), bias: bias, groupSize: groupSize, bits: bits)
+            weight: weight + matmul(loraB, loraA),
+            bias: bias,
+            groupSize: groupSize,
+            bits: bits
+        )
     }
 
     public override func callAsFunction(_ x: MLXArray) -> MLXArray {
Original file line number	Diff line number	Diff line change
`@@ -206,8 +206,8 @@ public enum LoRATrain {`
`206`	`206`	`let children = layer.children()`
`207`	`207`	`for key in keys {`
`208`	`208`	`if let item = children[key], case .value(let child) = item {`
`209`		`- if let lora = child as? LoRAConvertToLinear {`
`210`		`- update[key] = .value(lora.toLinear(deQuantize: deQuantize))`
	`209`	`+ if let lora = child as? LoRALayer {`
	`210`	`+ update[key] = .value(lora.fused())`
`211`	`211`	`}`
`212`	`212`	`}`
`213`	`213`	`}`