swift-format

jolonf · jolonf · commit 50ff67a87a75 · 2025-05-05T13:31:25.000+10:00
diff --git a/Applications/MLXChatExample/Models/PromptCache.swift b/Applications/MLXChatExample/Models/PromptCache.swift
@@ -44,30 +44,30 @@ public class PromptCache: @unchecked Sendable {
     ///         - If the cache is not trimmable return nil for the caller
     ///             to create a new cache.
     public func getUncachedSuffix(prompt: MLXArray) -> MLXArray? {
-        
+
         print("[getUncachedSuffix] self.tokens.size = \(self.tokens.size)")
-        
+
         print("cache[\(self.tokens.size)]: \(self.tokens)")
         print("prompt[\(prompt.size)]: \(prompt)")
-    
+
         let comPrefixLength = commonPrefixLength(newPromptTokens: prompt)
         print("[getUncachedSuffix] comPrefixLength: \(comPrefixLength)")
-        
+
         if comPrefixLength == self.tokens.size {
-            let suffix = prompt[comPrefixLength..<prompt.size]
+            let suffix = prompt[comPrefixLength ..< prompt.size]
             print("Concating...")
             self.tokens = concatenated([self.tokens, suffix], axis: 0)
             return suffix
-        }  else if (comPrefixLength < self.tokens.size) {
+        } else if comPrefixLength < self.tokens.size {
             if isTrimmable() {
                 print("trimming: \(self.tokens.size - comPrefixLength)")
                 let trimmedLen = self.trim(self.tokens.size - comPrefixLength)
                 print("trimmed: \(trimmedLen)")
                 if trimmedLen != self.tokens.size - comPrefixLength {
                     print("Warning: request trimmed amount and actual trimmed amount are different")
                 }
-                self.tokens = self.tokens[0..<comPrefixLength]
-                let suffix = prompt[comPrefixLength..<prompt.size]
+                self.tokens = self.tokens[0 ..< comPrefixLength]
+                let suffix = prompt[comPrefixLength ..< prompt.size]
                 self.tokens = concatenated([self.tokens, suffix], axis: 0)
                 return suffix
             } else {
@@ -81,15 +81,15 @@ public class PromptCache: @unchecked Sendable {
 
     /// - Returns: true if all KV caches are trimmable
     public func isTrimmable() -> Bool {
-        return cache.allSatisfy { $0.isTrimmable()}
+        return cache.allSatisfy { $0.isTrimmable() }
     }
 
     /// Trims all KV caches.
     /// - Parameters:
     ///   - n: Amount to trim.
     /// - Returns: Amount KV Caches were trimmed (may be less than ``n``).
     public func trim(_ n: Int) -> Int {
-        if !self.isTrimmable(){
+        if !self.isTrimmable() {
             return 0
         }
         return cache.map { $0.trim(n: n) }.max() ?? 0
@@ -103,7 +103,7 @@ public class PromptCache: @unchecked Sendable {
     public func commonPrefixLength(newPromptTokens: MLXArray) -> Int {
         return commonPrefixLength(self.tokens, newPromptTokens)
     }
-    
+
     /// Finds the common prefix between ``MLXArray``s.
     /// - Parameters:
     ///   - array1: First array
@@ -113,7 +113,7 @@ public class PromptCache: @unchecked Sendable {
         // TODO: Add test cases
         print("Calculating common prefix: array1[\(array1.size)] array2[\(array2.size)]")
         let minLength = min(array1.size, array2.size)
-        for i in 0..<minLength {
+        for i in 0 ..< minLength {
             if all(array1[i] .!= array2[i]).item(Bool.self) {
                 return i
             }
diff --git a/Applications/MLXChatExample/Services/MLXService.swift b/Applications/MLXChatExample/Services/MLXService.swift
@@ -19,6 +19,7 @@ class MLXService {
     /// Includes both language models (LLM) and vision-language models (VLM).
     static let availableModels: [LMModel] = [
         LMModel(name: "llama3.2:1b", configuration: LLMRegistry.llama3_2_1B_4bit, type: .llm),
+        LMModel(name: "llama3.2:3b", configuration: LLMRegistry.llama3_2_3B_4bit, type: .llm),
         LMModel(name: "qwen2.5:1.5b", configuration: LLMRegistry.qwen2_5_1_5b, type: .llm),
         LMModel(name: "smolLM:135m", configuration: LLMRegistry.smolLM_135M_4bit, type: .llm),
         LMModel(name: "qwen3:0.6b", configuration: LLMRegistry.qwen3_0_6b_4bit, type: .llm),
@@ -72,13 +73,13 @@ class MLXService {
                     self.modelDownloadProgress = progress
                 }
             }
-            
+
             // Clear out the promptCache
             promptCache.removeObject(forKey: model.name as NSString)
-            
+
             // Cache the loaded model for future use
             modelCache.setObject(container, forKey: model.name as NSString)
-            
+
             return container
         }
     }
@@ -127,15 +128,20 @@ class MLXService {
             // Get the prompt cache and adjust new prompt to remove
             // prefix already in cache, trim cache if cache is
             // inconsistent with new prompt.
-            let (cache, lmInput) = getPromptCache(fullPrompt: fullPrompt, parameters: parameters, context: context, modelName: model.name)
-            
+            let (cache, lmInput) = getPromptCache(
+                fullPrompt: fullPrompt, parameters: parameters, context: context,
+                modelName: model.name)
+
             // TODO: The generated tokens should be added to the prompt cache but not possible with AsyncStream
             return try MLXLMCommon.generate(
                 input: lmInput, parameters: parameters, context: context, cache: cache.cache)
         }
     }
-    
-    func getPromptCache(fullPrompt: LMInput, parameters: GenerateParameters, context: ModelContext, modelName: String) -> (PromptCache, LMInput) {
+
+    func getPromptCache(
+        fullPrompt: LMInput, parameters: GenerateParameters, context: ModelContext,
+        modelName: String
+    ) -> (PromptCache, LMInput) {
         let cache: PromptCache
         if let existingCache = promptCache.object(forKey: modelName as NSString) {
             cache = existingCache
@@ -146,7 +152,7 @@ class MLXService {
         }
 
         let lmInput: LMInput
-        
+
         /// Remove prefix from prompt that is already in cache
         if let suffix = cache.getUncachedSuffix(prompt: fullPrompt.text.tokens) {
             lmInput = LMInput(text: LMInput.Text(tokens: suffix))
@@ -157,7 +163,7 @@ class MLXService {
             self.promptCache.setObject(newCache, forKey: modelName as NSString)
             lmInput = fullPrompt
         }
-        
+
         return (cache, lmInput)
     }
 }
diff --git a/Libraries/MLXLMCommon/KVCache.swift b/Libraries/MLXLMCommon/KVCache.swift
@@ -12,9 +12,9 @@ public protocol KVCache: Evaluatable {
     var offset: Int { get }
 
     func update(keys: MLXArray, values: MLXArray) -> (MLXArray, MLXArray)
-    
+
     func isTrimmable() -> Bool
-    
+
     func trim(n: Int) -> Int
 }
 
@@ -100,11 +100,11 @@ public class KVCacheSimple: KVCache, Evaluatable {
             self.values![.ellipsis, ..<self.offset, 0...]
         )
     }
-    
+
     public func isTrimmable() -> Bool {
         return true
     }
-    
+
     public func trim(n: Int) -> Int {
         let toTrim = min(self.offset, n)
         self.offset -= toTrim

Original file line number	Diff line number	Diff line change
`@@ -12,9 +12,9 @@ public protocol KVCache: Evaluatable {`
`12`	`12`	`var offset: Int { get }`
`13`	`13`
`14`	`14`	`func update(keys: MLXArray, values: MLXArray) -> (MLXArray, MLXArray)`
`15`		`-`
	`15`	`+`
`16`	`16`	`func isTrimmable() -> Bool`
`17`		`-`
	`17`	`+`
`18`	`18`	`func trim(n: Int) -> Int`
`19`	`19`	`}`
`20`	`20`
`@@ -100,11 +100,11 @@ public class KVCacheSimple: KVCache, Evaluatable {`
`100`	`100`	`self.values![.ellipsis, ..<self.offset, 0...]`
`101`	`101`	`)`
`102`	`102`	`}`
`103`		`-`
	`103`	`+`
`104`	`104`	`public func isTrimmable() -> Bool {`
`105`	`105`	`return true`
`106`	`106`	`}`
`107`		`-`
	`107`	`+`
`108`	`108`	`public func trim(n: Int) -> Int {`
`109`	`109`	`let toTrim = min(self.offset, n)`
`110`	`110`	`self.offset -= toTrim`