Support tool use and add example (#174)

DePasqualeOrg · web-flow · commit 983eaac1d20b · 2025-02-05T07:59:20.000-08:00
* Update swift-transformers

* Support tool use, add example
diff --git a/Applications/LLMEval/ContentView.swift b/Applications/LLMEval/ContentView.swift
@@ -10,10 +10,10 @@ import SwiftUI
 import Tokenizers
 
 struct ContentView: View {
+    @Environment(DeviceStat.self) private var deviceStat
 
-    @State var prompt = ""
     @State var llm = LLMEvaluator()
-    @Environment(DeviceStat.self) private var deviceStat
+    @State var prompt = "What's the current weather in Paris?"
 
     enum displayStyle: String, CaseIterable, Identifiable {
         case plain, markdown
@@ -34,6 +34,10 @@ struct ContentView: View {
                     Text(llm.stat)
                 }
                 HStack {
+                    Toggle(isOn: $llm.includeWeatherTool) {
+                        Text("Include \"get current weather\" tool")
+                    }
+                    .frame(maxWidth: 350, alignment: .leading)
                     Spacer()
                     if llm.running {
                         ProgressView()
@@ -127,7 +131,6 @@ struct ContentView: View {
         }
         .task {
             self.prompt = llm.modelConfiguration.defaultPrompt
-
             // pre-load the weights on launch to speed up the first generation
             _ = try? await llm.load()
         }
@@ -154,13 +157,15 @@ class LLMEvaluator {
 
     var running = false
 
+    var includeWeatherTool = false
+
     var output = ""
     var modelInfo = ""
     var stat = ""
 
-    /// This controls which model loads. `phi3_5_4bit` is one of the smaller ones, so this will fit on
+    /// This controls which model loads. `qwen2_5_1_5b` is one of the smaller ones, so this will fit on
     /// more devices.
-    let modelConfiguration = ModelRegistry.phi3_5_4bit
+    let modelConfiguration = ModelRegistry.qwen2_5_1_5b
 
     /// parameters controlling the output
     let generateParameters = GenerateParameters(temperature: 0.6)
@@ -178,6 +183,29 @@ class LLMEvaluator {
 
     var loadState = LoadState.idle
 
+    let currentWeatherToolSpec: [String: any Sendable] =
+        [
+            "type": "function",
+            "function": [
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": [
+                    "type": "object",
+                    "properties": [
+                        "location": [
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        ] as [String: String],
+                        "unit": [
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                        ] as [String: any Sendable],
+                    ] as [String: [String: any Sendable]],
+                    "required": ["location"],
+                ] as [String: any Sendable],
+            ] as [String: any Sendable],
+        ] as [String: any Sendable]
+
     /// load and return the model -- can be called multiple times, subsequent calls will
     /// just return the loaded model
     func load() async throws -> ModelContainer {
@@ -222,18 +250,22 @@ class LLMEvaluator {
             MLXRandom.seed(UInt64(Date.timeIntervalSinceReferenceDate * 1000))
 
             let result = try await modelContainer.perform { context in
-                let input = try await context.processor.prepare(input: .init(prompt: prompt))
+                let input = try await context.processor.prepare(
+                    input: .init(
+                        messages: [
+                            ["role": "system", "content": "You are a helpful assistant."],
+                            ["role": "user", "content": prompt],
+                        ], tools: includeWeatherTool ? [currentWeatherToolSpec] : nil))
                 return try MLXLMCommon.generate(
                     input: input, parameters: generateParameters, context: context
                 ) { tokens in
-                    // update the output -- this will make the view show the text as it generates
+                    // Show the text in the view as it generates
                     if tokens.count % displayEveryNTokens == 0 {
                         let text = context.tokenizer.decode(tokens: tokens)
                         Task { @MainActor in
                             self.output = text
                         }
                     }
-
                     if tokens.count >= maxTokens {
                         return .stop
                     } else {
diff --git a/Libraries/MLXLLM/LLMModelFactory.swift b/Libraries/MLXLLM/LLMModelFactory.swift
@@ -149,6 +149,16 @@ public class ModelRegistry: @unchecked Sendable {
         defaultPrompt: "why is the sky blue?"
     )
 
+    static public let qwen2_5_7b = ModelConfiguration(
+        id: "mlx-community/Qwen2.5-7B-Instruct-4bit",
+        defaultPrompt: "Why is the sky blue?"
+    )
+
+    static public let qwen2_5_1_5b = ModelConfiguration(
+        id: "mlx-community/Qwen2.5-1.5B-Instruct-4bit",
+        defaultPrompt: "Why is the sky blue?"
+    )
+
     static public let openelm270m4bit = ModelConfiguration(
         id: "mlx-community/OpenELM-270M-Instruct",
         // https://huggingface.co/apple/OpenELM
@@ -193,6 +203,8 @@ public class ModelRegistry: @unchecked Sendable {
             phi3_5_4bit,
             phi4bit,
             qwen205b4bit,
+            qwen2_5_7b,
+            qwen2_5_1_5b,
             smolLM_135M_4bit,
         ]
     }
@@ -229,7 +241,8 @@ private struct LLMUserInputProcessor: UserInputProcessor {
     func prepare(input: UserInput) throws -> LMInput {
         do {
             let messages = input.prompt.asMessages()
-            let promptTokens = try tokenizer.applyChatTemplate(messages: messages)
+            let promptTokens = try tokenizer.applyChatTemplate(
+                messages: messages, tools: input.tools, additionalContext: input.additionalContext)
             return LMInput(tokens: MLXArray(promptTokens))
         } catch {
             // #150 -- it might be a TokenizerError.chatTemplate("No chat template was specified")
diff --git a/Libraries/MLXLMCommon/UserInput.swift b/Libraries/MLXLMCommon/UserInput.swift
@@ -4,6 +4,7 @@ import AVFoundation
 import CoreImage
 import Foundation
 import MLX
+import Tokenizers
 
 /// Container for raw user input.
 ///
@@ -125,23 +126,42 @@ public struct UserInput: Sendable {
     public var prompt: Prompt
     public var images = [Image]()
     public var videos = [Video]()
+    public var tools: [ToolSpec]?
+    /// Additional values provided for the chat template rendering context
+    public var additionalContext: [String: Any]?
     public var processing: Processing = .init()
 
-    public init(prompt: String, images: [Image] = [Image](), videos: [Video] = [Video]()) {
+    public init(
+        prompt: String, images: [Image] = [Image](), videos: [Video] = [Video](),
+        tools: [ToolSpec]? = nil,
+        additionalContext: [String: Any]? = nil
+    ) {
         self.prompt = .text(prompt)
         self.images = images
         self.videos = videos
+        self.tools = tools
+        self.additionalContext = additionalContext
     }
 
-    public init(messages: [[String: String]], images: [Image] = [Image]()) {
+    public init(
+        messages: [[String: String]], images: [Image] = [Image](), tools: [ToolSpec]? = nil,
+        additionalContext: [String: Any]? = nil
+    ) {
         self.prompt = .messages(messages)
         self.images = images
+        self.tools = tools
+        self.additionalContext = additionalContext
     }
 
-    public init(prompt: Prompt, images: [Image] = [Image](), processing: Processing = .init()) {
+    public init(
+        prompt: Prompt, images: [Image] = [Image](), processing: Processing = .init(),
+        tools: [ToolSpec]? = nil, additionalContext: [String: Any]? = nil
+    ) {
         self.prompt = prompt
         self.images = images
         self.processing = processing
+        self.tools = tools
+        self.additionalContext = additionalContext
     }
 }
 
diff --git a/Package.swift b/Package.swift
@@ -29,7 +29,7 @@ let package = Package(
     dependencies: [
         .package(url: "https://github.com/ml-explore/mlx-swift", .upToNextMinor(from: "0.21.2")),
         .package(
-            url: "https://github.com/huggingface/swift-transformers", .upToNextMinor(from: "0.1.15")
+            url: "https://github.com/huggingface/swift-transformers", .upToNextMinor(from: "0.1.17")
         ),
         .package(
             url: "https://github.com/apple/swift-async-algorithms", .upToNextMinor(from: "1.0.0")),
diff --git a/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/mlx-swift-examples.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved