Skip to content

Commit 5594cf9

Browse files
committed
Improve error handling and add error messages
1 parent 1a926a2 commit 5594cf9

File tree

5 files changed

+200
-42
lines changed

5 files changed

+200
-42
lines changed

Sources/Hub/Hub.swift

Lines changed: 150 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//
22
// Hub.swift
3-
//
3+
//
44
//
55
// Created by Pedro Cuenca on 18/5/23.
66
//
@@ -10,23 +10,78 @@ import Foundation
1010
public struct Hub {}
1111

1212
public extension Hub {
13-
enum HubClientError: Error {
14-
case parse
13+
enum HubClientError: LocalizedError {
14+
case parseError(String? = nil)
1515
case authorizationRequired
16-
case unexpectedError
16+
case networkError(Error)
17+
case resourceNotFound(String)
18+
case rateLimitExceeded
19+
case serverError(Int, String? = nil)
20+
case clientError(Int, String? = nil)
21+
case invalidRequest(String)
22+
case fileSystemError(Error)
23+
case configurationMissing(String)
24+
case unsupportedModelType(String)
25+
case tokenizationError(String)
1726
case httpStatusCode(Int)
27+
28+
public var errorDescription: String? {
29+
switch self {
30+
case .parseError(let details):
31+
let baseMessage = String(localized: "Failed to parse the response from Hugging Face Hub.", comment: "Error when parsing Hub response")
32+
return details.map { baseMessage + " " + $0 } ?? baseMessage
33+
34+
case .authorizationRequired:
35+
return String(localized: "Authorization is required to access this resource on Hugging Face Hub.", comment: "Error when authorization is needed")
36+
37+
case .networkError(let error):
38+
return String(localized: "Network error while communicating with Hugging Face Hub: \(error.localizedDescription)", comment: "Network error message")
39+
40+
case .resourceNotFound(let resource):
41+
return String(localized: "The requested resource '\(resource)' was not found on Hugging Face Hub.", comment: "Resource not found error")
42+
43+
case .rateLimitExceeded:
44+
return String(localized: "Rate limit exceeded for Hugging Face Hub API. Please try again later.", comment: "Rate limit error")
45+
46+
case .serverError(let code, let details):
47+
let baseMessage = String(localized: "The Hugging Face Hub server encountered an error (code: \(code)).", comment: "Server error with status code")
48+
return details.map { baseMessage + " " + $0 } ?? baseMessage
49+
50+
case .clientError(let code, let details):
51+
let baseMessage = String(localized: "Client error when accessing Hugging Face Hub (code: \(code)).", comment: "Client error with status code")
52+
return details.map { baseMessage + " " + $0 } ?? baseMessage
53+
54+
case .invalidRequest(let reason):
55+
return String(localized: "Invalid request to Hugging Face Hub: \(reason)", comment: "Invalid request error")
56+
57+
case .fileSystemError(let error):
58+
return String(localized: "File system error while handling Hub resources: \(error.localizedDescription)", comment: "File system error")
59+
60+
case .configurationMissing(let file):
61+
return String(localized: "Required configuration file '\(file)' is missing.", comment: "Missing configuration file")
62+
63+
case .unsupportedModelType(let type):
64+
return String(localized: "The model type '\(type)' is not supported.", comment: "Unsupported model type")
65+
66+
case .tokenizationError(let details):
67+
return String(localized: "Error during tokenization: \(details)", comment: "Tokenization error")
68+
69+
case .httpStatusCode(let code):
70+
return String(localized: "The server returned an error with status code: \(code).", comment: "HTTP error with status code")
71+
}
72+
}
1873
}
19-
74+
2075
enum RepoType: String {
2176
case models
2277
case datasets
2378
case spaces
2479
}
25-
80+
2681
struct Repo {
2782
public let id: String
2883
public let type: RepoType
29-
84+
3085
public init(id: String, type: RepoType = .models) {
3186
self.id = id
3287
self.type = type
@@ -51,11 +106,11 @@ public struct Config {
51106
.map { $0.offset == 0 ? $0.element.lowercased() : $0.element.capitalized }
52107
.joined()
53108
}
54-
109+
55110
func uncamelCase(_ string: String) -> String {
56111
let scalars = string.unicodeScalars
57112
var result = ""
58-
113+
59114
var previousCharacterIsLowercase = false
60115
for scalar in scalars {
61116
if CharacterSet.uppercaseLetters.contains(scalar) {
@@ -70,7 +125,7 @@ public struct Config {
70125
previousCharacterIsLowercase = true
71126
}
72127
}
73-
128+
74129
return result
75130
}
76131

@@ -88,17 +143,17 @@ public struct Config {
88143
public var value: Any? {
89144
return dictionary["value"]
90145
}
91-
146+
92147
public var intValue: Int? { value as? Int }
93148
public var boolValue: Bool? { value as? Bool }
94149
public var stringValue: String? { value as? String }
95-
150+
96151
// Instead of doing this we could provide custom classes and decode to them
97152
public var arrayValue: [Config]? {
98153
guard let list = value as? [Any] else { return nil }
99154
return list.map { Config($0 as! [NSString : Any]) }
100155
}
101-
156+
102157
/// Tuple of token identifier and string value
103158
public var tokenValue: (UInt, String)? { value as? (UInt, String) }
104159
}
@@ -120,7 +175,7 @@ public class LanguageModelConfigurationFromHub {
120175
return try await self.loadConfig(modelName: modelName, hubApi: hubApi)
121176
}
122177
}
123-
178+
124179
public init(
125180
modelFolder: URL,
126181
hubApi: HubApi = .shared
@@ -179,47 +234,104 @@ public class LanguageModelConfigurationFromHub {
179234
) async throws -> Configurations {
180235
let filesToDownload = ["config.json", "tokenizer_config.json", "chat_template.json", "tokenizer.json"]
181236
let repo = Hub.Repo(id: modelName)
182-
let downloadedModelFolder = try await hubApi.snapshot(from: repo, matching: filesToDownload)
183237

184-
return try await loadConfig(modelFolder: downloadedModelFolder, hubApi: hubApi)
238+
do {
239+
let downloadedModelFolder = try await hubApi.snapshot(from: repo, matching: filesToDownload)
240+
return try await loadConfig(modelFolder: downloadedModelFolder, hubApi: hubApi)
241+
} catch {
242+
// Convert generic errors to more specific ones
243+
if let urlError = error as? URLError {
244+
switch urlError.code {
245+
case .notConnectedToInternet, .networkConnectionLost:
246+
throw Hub.HubClientError.networkError(urlError)
247+
case .resourceUnavailable:
248+
throw Hub.HubClientError.resourceNotFound(modelName)
249+
default:
250+
throw Hub.HubClientError.networkError(urlError)
251+
}
252+
} else {
253+
throw error
254+
}
255+
}
185256
}
186257

187258
func loadConfig(
188259
modelFolder: URL,
189260
hubApi: HubApi = .shared
190261
) async throws -> Configurations {
191-
// Load required configurations
192-
let modelConfig = try hubApi.configuration(fileURL: modelFolder.appending(path: "config.json"))
193-
let tokenizerData = try hubApi.configuration(fileURL: modelFolder.appending(path: "tokenizer.json"))
194-
// Load tokenizer config
195-
var tokenizerConfig = try? hubApi.configuration(fileURL: modelFolder.appending(path: "tokenizer_config.json"))
196-
// Check for chat template and merge if available
197-
if let chatTemplateConfig = try? hubApi.configuration(fileURL: modelFolder.appending(path: "chat_template.json")),
198-
let chatTemplate = chatTemplateConfig.chatTemplate?.stringValue {
199-
// The value of chat_template could also be an array of strings, but we're not handling that case here, since it's discouraged.
200-
// Create or update tokenizer config with chat template
201-
if var configDict = tokenizerConfig?.dictionary {
202-
configDict["chat_template"] = chatTemplate
203-
tokenizerConfig = Config(configDict)
204-
} else {
205-
tokenizerConfig = Config(["chat_template": chatTemplate])
262+
do {
263+
// Load required configurations
264+
let modelConfigURL = modelFolder.appending(path: "config.json")
265+
guard FileManager.default.fileExists(atPath: modelConfigURL.path) else {
266+
throw Hub.HubClientError.configurationMissing("config.json")
267+
}
268+
269+
let modelConfig = try hubApi.configuration(fileURL: modelConfigURL)
270+
271+
let tokenizerDataURL = modelFolder.appending(path: "tokenizer.json")
272+
guard FileManager.default.fileExists(atPath: tokenizerDataURL.path) else {
273+
throw Hub.HubClientError.configurationMissing("tokenizer.json")
274+
}
275+
276+
let tokenizerData = try hubApi.configuration(fileURL: tokenizerDataURL)
277+
278+
// Load tokenizer config (optional)
279+
var tokenizerConfig: Config? = nil
280+
let tokenizerConfigURL = modelFolder.appending(path: "tokenizer_config.json")
281+
if FileManager.default.fileExists(atPath: tokenizerConfigURL.path) {
282+
tokenizerConfig = try hubApi.configuration(fileURL: tokenizerConfigURL)
283+
}
284+
285+
// Check for chat template and merge if available
286+
let chatTemplateURL = modelFolder.appending(path: "chat_template.json")
287+
if FileManager.default.fileExists(atPath: chatTemplateURL.path),
288+
let chatTemplateConfig = try? hubApi.configuration(fileURL: chatTemplateURL),
289+
let chatTemplate = chatTemplateConfig.chatTemplate?.stringValue {
290+
// Create or update tokenizer config with chat template
291+
if var configDict = tokenizerConfig?.dictionary {
292+
configDict["chat_template"] = chatTemplate
293+
tokenizerConfig = Config(configDict)
294+
} else {
295+
tokenizerConfig = Config(["chat_template": chatTemplate])
296+
}
206297
}
298+
299+
return Configurations(
300+
modelConfig: modelConfig,
301+
tokenizerConfig: tokenizerConfig,
302+
tokenizerData: tokenizerData
303+
)
304+
} catch let error as Hub.HubClientError {
305+
throw error
306+
} catch {
307+
if let nsError = error as NSError? {
308+
if nsError.domain == NSCocoaErrorDomain && nsError.code == NSFileReadNoSuchFileError {
309+
throw Hub.HubClientError.fileSystemError(error)
310+
} else if nsError.domain == "NSJSONSerialization" {
311+
throw Hub.HubClientError.parseError("Invalid JSON format: \(nsError.localizedDescription)")
312+
}
313+
}
314+
throw Hub.HubClientError.fileSystemError(error)
207315
}
208-
return Configurations(
209-
modelConfig: modelConfig,
210-
tokenizerConfig: tokenizerConfig,
211-
tokenizerData: tokenizerData
212-
)
213316
}
214317

215318
static func fallbackTokenizerConfig(for modelType: String) -> Config? {
216-
guard let url = Bundle.module.url(forResource: "\(modelType)_tokenizer_config", withExtension: "json") else { return nil }
319+
guard let url = Bundle.module.url(forResource: "\(modelType)_tokenizer_config", withExtension: "json") else {
320+
return nil
321+
}
322+
217323
do {
218324
let data = try Data(contentsOf: url)
219325
let parsed = try JSONSerialization.jsonObject(with: data, options: [])
220-
guard let dictionary = parsed as? [NSString: Any] else { return nil }
326+
guard let dictionary = parsed as? [NSString: Any] else {
327+
throw Hub.HubClientError.parseError("Failed to parse fallback tokenizer config")
328+
}
221329
return Config(dictionary)
330+
} catch let error as Hub.HubClientError {
331+
print("Error loading fallback tokenizer config: \(error.localizedDescription)")
332+
return nil
222333
} catch {
334+
print("Error loading fallback tokenizer config: \(error.localizedDescription)")
223335
return nil
224336
}
225337
}

Sources/Models/LanguageModel.swift

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,13 @@ extension LanguageModel: TextGenerationModel {
214214
}
215215
}
216216

217-
public enum TokenizerError: Error {
217+
public enum TokenizerError: LocalizedError {
218218
case tokenizerConfigNotFound
219+
220+
public var errorDescription: String? {
221+
switch self {
222+
case .tokenizerConfigNotFound:
223+
return String(localized: "Tokenizer configuration could not be found. The model may be missing required tokenizer files.", comment: "Error when tokenizer configuration is missing")
224+
}
225+
}
219226
}

Sources/TensorUtils/Weights.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,18 @@ import CoreML
33

44
public struct Weights {
55

6-
enum WeightsError: Error {
6+
enum WeightsError: LocalizedError {
77
case notSupported(message: String)
88
case invalidFile
9+
10+
public var errorDescription: String? {
11+
switch self {
12+
case .notSupported(let message):
13+
return String(localized: "The weight format '\(message)' is not supported by this application.", comment: "Error when weight format is not supported")
14+
case .invalidFile:
15+
return String(localized: "The weights file is invalid or corrupted.", comment: "Error when weight file is invalid")
16+
}
17+
}
918
}
1019

1120
private let dictionary: [String: MLMultiArray]

Sources/Tokenizers/Tokenizer.swift

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import Jinja
1212
public typealias Message = [String: Any]
1313
public typealias ToolSpec = [String: Any]
1414

15-
public enum TokenizerError: Error {
15+
public enum TokenizerError: LocalizedError {
1616
case missingConfig
1717
case missingTokenizerClassInConfig
1818
case unsupportedTokenizer(String)
@@ -21,6 +21,27 @@ public enum TokenizerError: Error {
2121
case chatTemplate(String)
2222
case tooLong(String)
2323
case mismatchedConfig(String)
24+
25+
public var errorDescription: String? {
26+
switch self {
27+
case .missingConfig:
28+
return String(localized: "Tokenizer configuration is missing.", comment: "Error when tokenizer config cannot be found")
29+
case .missingTokenizerClassInConfig:
30+
return String(localized: "The tokenizer class is not specified in the configuration.", comment: "Error when tokenizer_class is missing in config")
31+
case .unsupportedTokenizer(let name):
32+
return String(localized: "The tokenizer type '\(name)' is not supported.", comment: "Error when tokenizer type is not supported")
33+
case .missingVocab:
34+
return String(localized: "Vocabulary file is missing from the tokenizer configuration.", comment: "Error when vocab file is missing")
35+
case .malformedVocab:
36+
return String(localized: "The vocabulary file is malformed or corrupted.", comment: "Error when vocab file is malformed")
37+
case .chatTemplate(let message):
38+
return String(localized: "Chat template error: \(message)", comment: "Error with chat template")
39+
case .tooLong(let message):
40+
return String(localized: "Input is too long: \(message)", comment: "Error when input exceeds maximum length")
41+
case .mismatchedConfig(let message):
42+
return String(localized: "Tokenizer configuration mismatch: \(message)", comment: "Error when tokenizer configuration is inconsistent")
43+
}
44+
}
2445
}
2546

2647
public protocol TokenizingModel {

Tests/HubTests/DownloaderTests.swift

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,18 @@ import Combine
1010
@testable import Hub
1111

1212
/// Errors that can occur during the download process
13-
enum DownloadError: Error {
13+
enum DownloadError: LocalizedError {
1414
case invalidDownloadLocation
1515
case unexpectedError
16+
17+
var errorDescription: String? {
18+
switch self {
19+
case .invalidDownloadLocation:
20+
return String(localized: "The download location is invalid or inaccessible.", comment: "Error when download destination is invalid")
21+
case .unexpectedError:
22+
return String(localized: "An unexpected error occurred during the download process.", comment: "Generic download error message")
23+
}
24+
}
1625
}
1726

1827
final class DownloaderTests: XCTestCase {

0 commit comments

Comments
 (0)