@@ -177,28 +177,39 @@ public class LanguageModelConfigurationFromHub {
177
177
modelName: String,
178
178
hubApi: HubApi = . shared
179
179
) async throws -> Configurations {
180
- let filesToDownload = [ " config.json " , " tokenizer_config.json " , " tokenizer.json " ]
180
+ let filesToDownload = [ " config.json " , " tokenizer_config.json " , " chat_template.json " , " tokenizer.json " ]
181
181
let repo = Hub . Repo ( id: modelName)
182
182
let downloadedModelFolder = try await hubApi. snapshot ( from: repo, matching: filesToDownload)
183
183
184
184
return try await loadConfig ( modelFolder: downloadedModelFolder, hubApi: hubApi)
185
185
}
186
-
186
+
187
187
func loadConfig(
188
188
modelFolder: URL,
189
189
hubApi: HubApi = . shared
190
190
) async throws -> Configurations {
191
- // Note tokenizerConfig may be nil (does not exist in all models)
191
+ // Load required configurations
192
192
let modelConfig = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " config.json " ) )
193
- let tokenizerConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer_config.json " ) )
194
- let tokenizerVocab = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer.json " ) )
195
-
196
- let configs = Configurations (
193
+ let tokenizerData = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer.json " ) )
194
+ // Load tokenizer config
195
+ var tokenizerConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer_config.json " ) )
196
+ // Check for chat template and merge if available
197
+ if let chatTemplateConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " chat_template.json " ) ) ,
198
+ let chatTemplate = chatTemplateConfig. chatTemplate? . stringValue {
199
+ // The value of chat_template could also be an array of strings, but we're not handling that case here, since it's discouraged.
200
+ // Create or update tokenizer config with chat template
201
+ if var configDict = tokenizerConfig? . dictionary {
202
+ configDict [ " chat_template " ] = chatTemplate
203
+ tokenizerConfig = Config ( configDict)
204
+ } else {
205
+ tokenizerConfig = Config ( [ " chat_template " : chatTemplate] )
206
+ }
207
+ }
208
+ return Configurations (
197
209
modelConfig: modelConfig,
198
210
tokenizerConfig: tokenizerConfig,
199
- tokenizerData: tokenizerVocab
211
+ tokenizerData: tokenizerData
200
212
)
201
- return configs
202
213
}
203
214
204
215
static func fallbackTokenizerConfig( for modelType: String) - > Config? {
0 commit comments