1
1
//
2
2
// Hub.swift
3
- //
3
+ //
4
4
//
5
5
// Created by Pedro Cuenca on 18/5/23.
6
6
//
@@ -10,23 +10,78 @@ import Foundation
10
10
public struct Hub { }
11
11
12
12
public extension Hub {
13
- enum HubClientError : Error {
14
- case parse
13
+ enum HubClientError : LocalizedError {
14
+ case parseError ( String ? = nil )
15
15
case authorizationRequired
16
- case unexpectedError
16
+ case networkError( Error )
17
+ case resourceNotFound( String )
18
+ case rateLimitExceeded
19
+ case serverError( Int , String ? = nil )
20
+ case clientError( Int , String ? = nil )
21
+ case invalidRequest( String )
22
+ case fileSystemError( Error )
23
+ case configurationMissing( String )
24
+ case unsupportedModelType( String )
25
+ case tokenizationError( String )
17
26
case httpStatusCode( Int )
27
+
28
+ public var errorDescription : String ? {
29
+ switch self {
30
+ case . parseError( let details) :
31
+ let baseMessage = String ( localized: " Failed to parse the response from Hugging Face Hub. " , comment: " Error when parsing Hub response " )
32
+ return details. map { baseMessage + " " + $0 } ?? baseMessage
33
+
34
+ case . authorizationRequired:
35
+ return String ( localized: " Authorization is required to access this resource on Hugging Face Hub. " , comment: " Error when authorization is needed " )
36
+
37
+ case . networkError( let error) :
38
+ return String ( localized: " Network error while communicating with Hugging Face Hub: \( error. localizedDescription) " , comment: " Network error message " )
39
+
40
+ case . resourceNotFound( let resource) :
41
+ return String ( localized: " The requested resource ' \( resource) ' was not found on Hugging Face Hub. " , comment: " Resource not found error " )
42
+
43
+ case . rateLimitExceeded:
44
+ return String ( localized: " Rate limit exceeded for Hugging Face Hub API. Please try again later. " , comment: " Rate limit error " )
45
+
46
+ case . serverError( let code, let details) :
47
+ let baseMessage = String ( localized: " The Hugging Face Hub server encountered an error (code: \( code) ). " , comment: " Server error with status code " )
48
+ return details. map { baseMessage + " " + $0 } ?? baseMessage
49
+
50
+ case . clientError( let code, let details) :
51
+ let baseMessage = String ( localized: " Client error when accessing Hugging Face Hub (code: \( code) ). " , comment: " Client error with status code " )
52
+ return details. map { baseMessage + " " + $0 } ?? baseMessage
53
+
54
+ case . invalidRequest( let reason) :
55
+ return String ( localized: " Invalid request to Hugging Face Hub: \( reason) " , comment: " Invalid request error " )
56
+
57
+ case . fileSystemError( let error) :
58
+ return String ( localized: " File system error while handling Hub resources: \( error. localizedDescription) " , comment: " File system error " )
59
+
60
+ case . configurationMissing( let file) :
61
+ return String ( localized: " Required configuration file ' \( file) ' is missing. " , comment: " Missing configuration file " )
62
+
63
+ case . unsupportedModelType( let type) :
64
+ return String ( localized: " The model type ' \( type) ' is not supported. " , comment: " Unsupported model type " )
65
+
66
+ case . tokenizationError( let details) :
67
+ return String ( localized: " Error during tokenization: \( details) " , comment: " Tokenization error " )
68
+
69
+ case . httpStatusCode( let code) :
70
+ return String ( localized: " The server returned an error with status code: \( code) . " , comment: " HTTP error with status code " )
71
+ }
72
+ }
18
73
}
19
-
74
+
20
75
enum RepoType : String {
21
76
case models
22
77
case datasets
23
78
case spaces
24
79
}
25
-
80
+
26
81
struct Repo {
27
82
public let id : String
28
83
public let type : RepoType
29
-
84
+
30
85
public init ( id: String , type: RepoType = . models) {
31
86
self . id = id
32
87
self . type = type
@@ -51,11 +106,11 @@ public struct Config {
51
106
. map { $0. offset == 0 ? $0. element. lowercased ( ) : $0. element. capitalized }
52
107
. joined ( )
53
108
}
54
-
109
+
55
110
func uncamelCase( _ string: String ) -> String {
56
111
let scalars = string. unicodeScalars
57
112
var result = " "
58
-
113
+
59
114
var previousCharacterIsLowercase = false
60
115
for scalar in scalars {
61
116
if CharacterSet . uppercaseLetters. contains ( scalar) {
@@ -70,7 +125,7 @@ public struct Config {
70
125
previousCharacterIsLowercase = true
71
126
}
72
127
}
73
-
128
+
74
129
return result
75
130
}
76
131
@@ -88,17 +143,17 @@ public struct Config {
88
143
public var value : Any ? {
89
144
return dictionary [ " value " ]
90
145
}
91
-
146
+
92
147
public var intValue : Int ? { value as? Int }
93
148
public var boolValue : Bool ? { value as? Bool }
94
149
public var stringValue : String ? { value as? String }
95
-
150
+
96
151
// Instead of doing this we could provide custom classes and decode to them
97
152
public var arrayValue : [ Config ] ? {
98
153
guard let list = value as? [ Any ] else { return nil }
99
154
return list. map { Config ( $0 as! [ NSString : Any ] ) }
100
155
}
101
-
156
+
102
157
/// Tuple of token identifier and string value
103
158
public var tokenValue : ( UInt , String ) ? { value as? ( UInt , String ) }
104
159
}
@@ -120,7 +175,7 @@ public class LanguageModelConfigurationFromHub {
120
175
return try await self . loadConfig ( modelName: modelName, hubApi: hubApi)
121
176
}
122
177
}
123
-
178
+
124
179
public init (
125
180
modelFolder: URL ,
126
181
hubApi: HubApi = . shared
@@ -179,47 +234,104 @@ public class LanguageModelConfigurationFromHub {
179
234
) async throws -> Configurations {
180
235
let filesToDownload = [ " config.json " , " tokenizer_config.json " , " chat_template.json " , " tokenizer.json " ]
181
236
let repo = Hub . Repo ( id: modelName)
182
- let downloadedModelFolder = try await hubApi. snapshot ( from: repo, matching: filesToDownload)
183
237
184
- return try await loadConfig ( modelFolder: downloadedModelFolder, hubApi: hubApi)
238
+ do {
239
+ let downloadedModelFolder = try await hubApi. snapshot ( from: repo, matching: filesToDownload)
240
+ return try await loadConfig ( modelFolder: downloadedModelFolder, hubApi: hubApi)
241
+ } catch {
242
+ // Convert generic errors to more specific ones
243
+ if let urlError = error as? URLError {
244
+ switch urlError. code {
245
+ case . notConnectedToInternet, . networkConnectionLost:
246
+ throw Hub . HubClientError. networkError ( urlError)
247
+ case . resourceUnavailable:
248
+ throw Hub . HubClientError. resourceNotFound ( modelName)
249
+ default :
250
+ throw Hub . HubClientError. networkError ( urlError)
251
+ }
252
+ } else {
253
+ throw error
254
+ }
255
+ }
185
256
}
186
257
187
258
func loadConfig(
188
259
modelFolder: URL,
189
260
hubApi: HubApi = . shared
190
261
) async throws -> Configurations {
191
- // Load required configurations
192
- let modelConfig = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " config.json " ) )
193
- let tokenizerData = try hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer.json " ) )
194
- // Load tokenizer config
195
- var tokenizerConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " tokenizer_config.json " ) )
196
- // Check for chat template and merge if available
197
- if let chatTemplateConfig = try ? hubApi. configuration ( fileURL: modelFolder. appending ( path: " chat_template.json " ) ) ,
198
- let chatTemplate = chatTemplateConfig. chatTemplate? . stringValue {
199
- // The value of chat_template could also be an array of strings, but we're not handling that case here, since it's discouraged.
200
- // Create or update tokenizer config with chat template
201
- if var configDict = tokenizerConfig? . dictionary {
202
- configDict [ " chat_template " ] = chatTemplate
203
- tokenizerConfig = Config ( configDict)
204
- } else {
205
- tokenizerConfig = Config ( [ " chat_template " : chatTemplate] )
262
+ do {
263
+ // Load required configurations
264
+ let modelConfigURL = modelFolder. appending ( path: " config.json " )
265
+ guard FileManager . default. fileExists ( atPath: modelConfigURL. path) else {
266
+ throw Hub . HubClientError. configurationMissing ( " config.json " )
267
+ }
268
+
269
+ let modelConfig = try hubApi. configuration ( fileURL: modelConfigURL)
270
+
271
+ let tokenizerDataURL = modelFolder. appending ( path: " tokenizer.json " )
272
+ guard FileManager . default. fileExists ( atPath: tokenizerDataURL. path) else {
273
+ throw Hub . HubClientError. configurationMissing ( " tokenizer.json " )
274
+ }
275
+
276
+ let tokenizerData = try hubApi. configuration ( fileURL: tokenizerDataURL)
277
+
278
+ // Load tokenizer config (optional)
279
+ var tokenizerConfig : Config ? = nil
280
+ let tokenizerConfigURL = modelFolder. appending ( path: " tokenizer_config.json " )
281
+ if FileManager . default. fileExists ( atPath: tokenizerConfigURL. path) {
282
+ tokenizerConfig = try hubApi. configuration ( fileURL: tokenizerConfigURL)
283
+ }
284
+
285
+ // Check for chat template and merge if available
286
+ let chatTemplateURL = modelFolder. appending ( path: " chat_template.json " )
287
+ if FileManager . default. fileExists ( atPath: chatTemplateURL. path) ,
288
+ let chatTemplateConfig = try ? hubApi. configuration ( fileURL: chatTemplateURL) ,
289
+ let chatTemplate = chatTemplateConfig. chatTemplate? . stringValue {
290
+ // Create or update tokenizer config with chat template
291
+ if var configDict = tokenizerConfig? . dictionary {
292
+ configDict [ " chat_template " ] = chatTemplate
293
+ tokenizerConfig = Config ( configDict)
294
+ } else {
295
+ tokenizerConfig = Config ( [ " chat_template " : chatTemplate] )
296
+ }
206
297
}
298
+
299
+ return Configurations (
300
+ modelConfig: modelConfig,
301
+ tokenizerConfig: tokenizerConfig,
302
+ tokenizerData: tokenizerData
303
+ )
304
+ } catch let error as Hub . HubClientError {
305
+ throw error
306
+ } catch {
307
+ if let nsError = error as NSError ? {
308
+ if nsError. domain == NSCocoaErrorDomain && nsError. code == NSFileReadNoSuchFileError {
309
+ throw Hub . HubClientError. fileSystemError ( error)
310
+ } else if nsError. domain == " NSJSONSerialization " {
311
+ throw Hub . HubClientError. parseError ( " Invalid JSON format: \( nsError. localizedDescription) " )
312
+ }
313
+ }
314
+ throw Hub . HubClientError. fileSystemError ( error)
207
315
}
208
- return Configurations (
209
- modelConfig: modelConfig,
210
- tokenizerConfig: tokenizerConfig,
211
- tokenizerData: tokenizerData
212
- )
213
316
}
214
317
215
318
static func fallbackTokenizerConfig( for modelType: String) - > Config? {
216
- guard let url = Bundle . module. url ( forResource: " \( modelType) _tokenizer_config " , withExtension: " json " ) else { return nil }
319
+ guard let url = Bundle . module. url ( forResource: " \( modelType) _tokenizer_config " , withExtension: " json " ) else {
320
+ return nil
321
+ }
322
+
217
323
do {
218
324
let data = try Data ( contentsOf: url)
219
325
let parsed = try JSONSerialization . jsonObject ( with: data, options: [ ] )
220
- guard let dictionary = parsed as? [ NSString : Any ] else { return nil }
326
+ guard let dictionary = parsed as? [ NSString : Any ] else {
327
+ throw Hub . HubClientError. parseError ( " Failed to parse fallback tokenizer config " )
328
+ }
221
329
return Config ( dictionary)
330
+ } catch let error as Hub . HubClientError {
331
+ print ( " Error loading fallback tokenizer config: \( error. localizedDescription) " )
332
+ return nil
222
333
} catch {
334
+ print ( " Error loading fallback tokenizer config: \( error. localizedDescription) " )
223
335
return nil
224
336
}
225
337
}
0 commit comments