|
| 1 | +// Copyright © 2025 Apple Inc. |
| 2 | + |
| 3 | +import Foundation |
| 4 | + |
| 5 | +/// Base ``LanguageModel`` configuration -- provides `modelType` |
| 6 | +/// and `quantization` (used in loading the model). |
| 7 | +/// |
| 8 | +/// This is used by ``ModelFactory/load(hub:configuration:progressHandler:)`` |
| 9 | +/// to determine the type of model to load. |
| 10 | +public struct BaseConfiguration: Codable, Sendable { |
| 11 | + public let modelType: String |
| 12 | + |
| 13 | + public struct Quantization: Codable, Sendable, Equatable { |
| 14 | + public init(groupSize: Int, bits: Int) { |
| 15 | + self.groupSize = groupSize |
| 16 | + self.bits = bits |
| 17 | + } |
| 18 | + |
| 19 | + public let groupSize: Int |
| 20 | + public let bits: Int |
| 21 | + |
| 22 | + public var asTuple: (Int, Int) { (groupSize, bits) } |
| 23 | + |
| 24 | + enum CodingKeys: String, CodingKey { |
| 25 | + case groupSize = "group_size" |
| 26 | + case bits = "bits" |
| 27 | + } |
| 28 | + } |
| 29 | + |
| 30 | + /// handling instructions for ``PerLayerQuantization`` |
| 31 | + public enum QuantizationOption: Sendable { |
| 32 | + case skip |
| 33 | + case quantize(Quantization) |
| 34 | + } |
| 35 | + |
| 36 | + /// Per-layer ``Quantization`` values with optional default. |
| 37 | + public struct PerLayerQuantization: Sendable { |
| 38 | + public var quantization: Quantization? = nil |
| 39 | + public var perLayerQuantization: [String: QuantizationOption] |
| 40 | + |
| 41 | + public init( |
| 42 | + quantization: BaseConfiguration.Quantization? = nil, |
| 43 | + perLayerQuantization: [String: BaseConfiguration.QuantizationOption] |
| 44 | + ) { |
| 45 | + self.quantization = quantization |
| 46 | + self.perLayerQuantization = perLayerQuantization |
| 47 | + } |
| 48 | + |
| 49 | + /// The quantization to apply for the given layer name or nil for no quantization. |
| 50 | + public func quantization(layer: String) -> Quantization? { |
| 51 | + if let perLayer = perLayerQuantization[layer] { |
| 52 | + switch perLayer { |
| 53 | + case .skip: |
| 54 | + return nil |
| 55 | + case .quantize(let quantization): |
| 56 | + return quantization |
| 57 | + } |
| 58 | + } else { |
| 59 | + return quantization |
| 60 | + } |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + /// Special codable to support a mixed key: Int / key: Quantization |
| 65 | + /// structure for hereogenous quantization, e.g. |
| 66 | + /// |
| 67 | + /// ``` |
| 68 | + /// "quantization": { |
| 69 | + /// "group_size": 64, |
| 70 | + /// "bits": 4, |
| 71 | + /// "model.embed_tokens": { |
| 72 | + /// "group_size": 32, |
| 73 | + /// "bits": 4 |
| 74 | + /// }, |
| 75 | + /// "model.layers.0.self_attn.q_norm": false, |
| 76 | + /// ``` |
| 77 | + /// |
| 78 | + /// This mixed type structure requires manual decoding. |
| 79 | + struct QuantizationContainer: Codable, Sendable { |
| 80 | + var quantization: Quantization |
| 81 | + var perLayerQuantization: PerLayerQuantization |
| 82 | + |
| 83 | + // based on Dictionary's coding key |
| 84 | + internal struct _DictionaryCodingKey: CodingKey { |
| 85 | + internal let stringValue: String |
| 86 | + internal let intValue: Int? |
| 87 | + |
| 88 | + internal init(stringValue: String) { |
| 89 | + self.stringValue = stringValue |
| 90 | + self.intValue = Int(stringValue) |
| 91 | + } |
| 92 | + |
| 93 | + internal init(intValue: Int) { |
| 94 | + self.stringValue = "\(intValue)" |
| 95 | + self.intValue = intValue |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + init(from decoder: any Decoder) throws { |
| 100 | + // handle the embedded Quantization |
| 101 | + self.quantization = try Quantization(from: decoder) |
| 102 | + |
| 103 | + // and the interleaved per-layer values |
| 104 | + var perLayerQuantization = [String: QuantizationOption]() |
| 105 | + let container = try decoder.container(keyedBy: _DictionaryCodingKey.self) |
| 106 | + for key in container.allKeys { |
| 107 | + switch key.stringValue { |
| 108 | + case Quantization.CodingKeys.groupSize.rawValue: continue |
| 109 | + case Quantization.CodingKeys.bits.rawValue: continue |
| 110 | + |
| 111 | + default: |
| 112 | + if let f = try? container.decode(Bool.self, forKey: key) { |
| 113 | + if !f { |
| 114 | + perLayerQuantization[key.stringValue] = .skip |
| 115 | + } |
| 116 | + } else { |
| 117 | + perLayerQuantization[key.stringValue] = .quantize( |
| 118 | + try container.decode(Quantization.self, forKey: key)) |
| 119 | + } |
| 120 | + } |
| 121 | + } |
| 122 | + self.perLayerQuantization = PerLayerQuantization( |
| 123 | + quantization: quantization, perLayerQuantization: perLayerQuantization) |
| 124 | + } |
| 125 | + |
| 126 | + func encode(to encoder: any Encoder) throws { |
| 127 | + try quantization.encode(to: encoder) |
| 128 | + |
| 129 | + var container = encoder.container(keyedBy: _DictionaryCodingKey.self) |
| 130 | + for (key, value) in perLayerQuantization.perLayerQuantization { |
| 131 | + switch value { |
| 132 | + case .skip: |
| 133 | + try container.encode(false, forKey: .init(stringValue: key)) |
| 134 | + case .quantize(let q): |
| 135 | + try container.encode(q, forKey: .init(stringValue: key)) |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | + } |
| 140 | + |
| 141 | + var quantizationContainer: QuantizationContainer? |
| 142 | + |
| 143 | + @available(*, deprecated, message: "Please use perLayerQuantization instead") |
| 144 | + public var quantization: Quantization? { |
| 145 | + quantizationContainer?.quantization |
| 146 | + } |
| 147 | + |
| 148 | + public var perLayerQuantization: PerLayerQuantization? { |
| 149 | + quantizationContainer?.perLayerQuantization |
| 150 | + } |
| 151 | + |
| 152 | + enum CodingKeys: String, CodingKey { |
| 153 | + case modelType = "model_type" |
| 154 | + case quantizationContainer = "quantization" |
| 155 | + } |
| 156 | +} |
0 commit comments