1
1
//
2
2
// Hub.swift
3
- //
3
+ //
4
4
//
5
5
// Created by Pedro Cuenca on 18/5/23.
6
6
//
@@ -10,23 +10,57 @@ import Foundation
10
10
public struct Hub { }
11
11
12
12
public extension Hub {
13
- enum HubClientError : Error {
14
- case parse
13
+ enum HubClientError : LocalizedError {
15
14
case authorizationRequired
16
- case unexpectedError
17
15
case httpStatusCode( Int )
16
+ case parse
17
+ case unexpectedError
18
+ case downloadError( String )
19
+ case fileNotFound( String )
20
+ case networkError( URLError )
21
+ case resourceNotFound( String )
22
+ case configurationMissing( String )
23
+ case fileSystemError( Error )
24
+ case parseError( String )
25
+
26
+ public var errorDescription : String ? {
27
+ switch self {
28
+ case . authorizationRequired:
29
+ return String ( localized: " Authentication required. Please provide a valid Hugging Face token. " )
30
+ case . httpStatusCode( let code) :
31
+ return String ( localized: " HTTP error with status code: \( code) " )
32
+ case . parse:
33
+ return String ( localized: " Failed to parse server response. " )
34
+ case . unexpectedError:
35
+ return String ( localized: " An unexpected error occurred. " )
36
+ case . downloadError( let message) :
37
+ return String ( localized: " Download failed: \( message) " )
38
+ case . fileNotFound( let filename) :
39
+ return String ( localized: " File not found: \( filename) " )
40
+ case . networkError( let error) :
41
+ return String ( localized: " Network error: \( error. localizedDescription) " )
42
+ case . resourceNotFound( let resource) :
43
+ return String ( localized: " Resource not found: \( resource) " )
44
+ case . configurationMissing( let file) :
45
+ return String ( localized: " Required configuration file missing: \( file) " )
46
+ case . fileSystemError( let error) :
47
+ return String ( localized: " File system error: \( error. localizedDescription) " )
48
+ case . parseError( let message) :
49
+ return String ( localized: " Parse error: \( message) " )
50
+ }
51
+ }
18
52
}
19
-
53
+
20
54
enum RepoType : String {
21
55
case models
22
56
case datasets
23
57
case spaces
24
58
}
25
-
59
+
26
60
struct Repo {
27
61
public let id : String
28
62
public let type : RepoType
29
-
63
+
30
64
public init ( id: String , type: RepoType = . models) {
31
65
self . id = id
32
66
self . type = type
@@ -51,11 +85,11 @@ public struct Config {
51
85
. map { $0. offset == 0 ? $0. element. lowercased ( ) : $0. element. capitalized }
52
86
. joined ( )
53
87
}
54
-
88
+
55
89
func uncamelCase( _ string: String ) -> String {
56
90
let scalars = string. unicodeScalars
57
91
var result = " "
58
-
92
+
59
93
var previousCharacterIsLowercase = false
60
94
for scalar in scalars {
61
95
if CharacterSet . uppercaseLetters. contains ( scalar) {
@@ -70,7 +104,7 @@ public struct Config {
70
104
previousCharacterIsLowercase = true
71
105
}
72
106
}
73
-
107
+
74
108
return result
75
109
}
76
110
@@ -88,17 +122,17 @@ public struct Config {
88
122
public var value : Any ? {
89
123
return dictionary [ " value " ]
90
124
}
91
-
125
+
92
126
public var intValue : Int ? { value as? Int }
93
127
public var boolValue : Bool ? { value as? Bool }
94
128
public var stringValue : String ? { value as? String }
95
-
129
+
96
130
// Instead of doing this we could provide custom classes and decode to them
97
131
public var arrayValue : [ Config ] ? {
98
132
guard let list = value as? [ Any ] else { return nil }
99
133
return list. map { Config ( $0 as! [ NSString : Any ] ) }
100
134
}
101
-
135
+
102
136
/// Tuple of token identifier and string value
103
137
public var tokenValue : ( UInt , String ) ? { value as? ( UInt , String ) }
104
138
}
@@ -120,7 +154,7 @@ public class LanguageModelConfigurationFromHub {
120
154
return try await self . loadConfig ( modelName: modelName, hubApi: hubApi)
121
155
}
122
156
}
123
-
157
+
124
158
public init (
125
159
modelFolder: URL ,
126
160
hubApi: HubApi = . shared
@@ -179,47 +213,104 @@ public class LanguageModelConfigurationFromHub {
179
213
) async throws -> Configurations {
180
214
let filesToDownload = [ " config.json " , " tokenizer_config.json " , " chat_template.json " , " tokenizer.json " ]
181
215
let repo = Hub . Repo ( id: modelName)
182
- let downloadedModelFolder = try await hubApi. snapshot ( from: repo, matching: filesToDownload)
183
216
184
- return try await loadConfig ( modelFolder: downloadedModelFolder, hubApi: hubApi)
217
+ do {
218
+ let downloadedModelFolder = try await hubApi. snapshot ( from: repo, matching: filesToDownload)
219
+ return try await loadConfig ( modelFolder: downloadedModelFolder, hubApi: hubApi)
220
+ } catch {
221
+ // Convert generic errors to more specific ones
222
+ if let urlError = error as? URLError {
223
+ switch urlError. code {
224
+ case . notConnectedToInternet, . networkConnectionLost:
225
+ throw Hub . HubClientError. networkError ( urlError)
226
+ case . resourceUnavailable:
227
+ throw Hub . HubClientError. resourceNotFound ( modelName)
228
+ default :
229
+ throw Hub . HubClientError. networkError ( urlError)
230
+ }
231
+ } else {
232
+ throw error
233
+ }
234
+ }
185
235
}
186
236
187
237
func loadConfig(
188
238
modelFolder: URL,
189
239
hubApi: HubApi = . shared
190
240
) async throws -> Configurations {
191
- // Load required configurations
192
- let modelConfig = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " config.json " ) )
193
- let tokenizerData = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer.json " ) )
194
- // Load tokenizer config
195
- var tokenizerConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer_config.json " ) )
196
- // Check for chat template and merge if available
197
- if let chatTemplateConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " chat_template.json " ) ) ,
198
- let chatTemplate = chatTemplateConfig. chatTemplate? . stringValue {
199
- // The value of chat_template could also be an array of strings, but we're not handling that case here, since it's discouraged.
200
- // Create or update tokenizer config with chat template
201
- if var configDict = tokenizerConfig? . dictionary {
202
- configDict [ " chat_template " ] = chatTemplate
203
- tokenizerConfig = Config ( configDict)
204
- } else {
205
- tokenizerConfig = Config ( [ " chat_template " : chatTemplate] )
241
+ do {
242
+ // Load required configurations
243
+ let modelConfigURL = modelFolder. appending ( path: " config.json " )
244
+ guard FileManager . default. fileExists ( atPath: modelConfigURL. path) else {
245
+ throw Hub . HubClientError. configurationMissing ( " config.json " )
246
+ }
247
+
248
+ let modelConfig = try hubApi. configuration ( fileURL: modelConfigURL)
249
+
250
+ let tokenizerDataURL = modelFolder. appending ( path: " tokenizer.json " )
251
+ guard FileManager . default. fileExists ( atPath: tokenizerDataURL. path) else {
252
+ throw Hub . HubClientError. configurationMissing ( " tokenizer.json " )
253
+ }
254
+
255
+ let tokenizerData = try hubApi. configuration ( fileURL: tokenizerDataURL)
256
+
257
+ // Load tokenizer config (optional)
258
+ var tokenizerConfig : Config ? = nil
259
+ let tokenizerConfigURL = modelFolder. appending ( path: " tokenizer_config.json " )
260
+ if FileManager . default. fileExists ( atPath: tokenizerConfigURL. path) {
261
+ tokenizerConfig = try hubApi. configuration ( fileURL: tokenizerConfigURL)
262
+ }
263
+
264
+ // Check for chat template and merge if available
265
+ let chatTemplateURL = modelFolder. appending ( path: " chat_template.json " )
266
+ if FileManager . default. fileExists ( atPath: chatTemplateURL. path) ,
267
+ let chatTemplateConfig = try ? hubApi. configuration ( fileURL: chatTemplateURL) ,
268
+ let chatTemplate = chatTemplateConfig. chatTemplate? . stringValue {
269
+ // Create or update tokenizer config with chat template
270
+ if var configDict = tokenizerConfig? . dictionary {
271
+ configDict [ " chat_template " ] = chatTemplate
272
+ tokenizerConfig = Config ( configDict)
273
+ } else {
274
+ tokenizerConfig = Config ( [ " chat_template " : chatTemplate] )
275
+ }
276
+ }
277
+
278
+ return Configurations (
279
+ modelConfig: modelConfig,
280
+ tokenizerConfig: tokenizerConfig,
281
+ tokenizerData: tokenizerData
282
+ )
283
+ } catch let error as Hub . HubClientError {
284
+ throw error
285
+ } catch {
286
+ if let nsError = error as NSError ? {
287
+ if nsError. domain == NSCocoaErrorDomain && nsError. code == NSFileReadNoSuchFileError {
288
+ throw Hub . HubClientError. fileSystemError ( error)
289
+ } else if nsError. domain == " NSJSONSerialization " {
290
+ throw Hub . HubClientError. parseError ( " Invalid JSON format: \( nsError. localizedDescription) " )
291
+ }
206
292
}
293
+ throw Hub . HubClientError. fileSystemError ( error)
207
294
}
208
- return Configurations (
209
- modelConfig: modelConfig,
210
- tokenizerConfig: tokenizerConfig,
211
- tokenizerData: tokenizerData
212
- )
213
295
}
214
296
215
297
static func fallbackTokenizerConfig( for modelType: String) - > Config? {
216
- guard let url = Bundle . module. url ( forResource: " \( modelType) _tokenizer_config " , withExtension: " json " ) else { return nil }
298
+ guard let url = Bundle . module. url ( forResource: " \( modelType) _tokenizer_config " , withExtension: " json " ) else {
299
+ return nil
300
+ }
301
+
217
302
do {
218
303
let data = try Data ( contentsOf: url)
219
304
let parsed = try JSONSerialization . jsonObject ( with: data, options: [ ] )
220
- guard let dictionary = parsed as? [ NSString : Any ] else { return nil }
305
+ guard let dictionary = parsed as? [ NSString : Any ] else {
306
+ throw Hub . HubClientError. parseError ( " Failed to parse fallback tokenizer config " )
307
+ }
221
308
return Config ( dictionary)
309
+ } catch let error as Hub . HubClientError {
310
+ print ( " Error loading fallback tokenizer config: \( error. localizedDescription) " )
311
+ return nil
222
312
} catch {
313
+ print ( " Error loading fallback tokenizer config: \( error. localizedDescription) " )
223
314
return nil
224
315
}
225
316
}
0 commit comments