Skip to content

Improve error handling and add error messages #186

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 129 additions & 38 deletions Sources/Hub/Hub.swift
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//
// Hub.swift
//
//
//
// Created by Pedro Cuenca on 18/5/23.
//
Expand All @@ -10,23 +10,57 @@ import Foundation
public struct Hub {}

public extension Hub {
enum HubClientError: Error {
case parse
enum HubClientError: LocalizedError {
case authorizationRequired
case unexpectedError
case httpStatusCode(Int)
case parse
case unexpectedError
case downloadError(String)
case fileNotFound(String)
case networkError(URLError)
case resourceNotFound(String)
case configurationMissing(String)
case fileSystemError(Error)
case parseError(String)

public var errorDescription: String? {
switch self {
case .authorizationRequired:
return String(localized: "Authentication required. Please provide a valid Hugging Face token.")
case .httpStatusCode(let code):
return String(localized: "HTTP error with status code: \(code)")
case .parse:
return String(localized: "Failed to parse server response.")
case .unexpectedError:
return String(localized: "An unexpected error occurred.")
case .downloadError(let message):
return String(localized: "Download failed: \(message)")
case .fileNotFound(let filename):
return String(localized: "File not found: \(filename)")
case .networkError(let error):
return String(localized: "Network error: \(error.localizedDescription)")
case .resourceNotFound(let resource):
return String(localized: "Resource not found: \(resource)")
case .configurationMissing(let file):
return String(localized: "Required configuration file missing: \(file)")
case .fileSystemError(let error):
return String(localized: "File system error: \(error.localizedDescription)")
case .parseError(let message):
return String(localized: "Parse error: \(message)")
}
}
}

enum RepoType: String {
case models
case datasets
case spaces
}

struct Repo {
public let id: String
public let type: RepoType

public init(id: String, type: RepoType = .models) {
self.id = id
self.type = type
Expand All @@ -51,11 +85,11 @@ public struct Config {
.map { $0.offset == 0 ? $0.element.lowercased() : $0.element.capitalized }
.joined()
}

func uncamelCase(_ string: String) -> String {
let scalars = string.unicodeScalars
var result = ""

var previousCharacterIsLowercase = false
for scalar in scalars {
if CharacterSet.uppercaseLetters.contains(scalar) {
Expand All @@ -70,7 +104,7 @@ public struct Config {
previousCharacterIsLowercase = true
}
}

return result
}

Expand All @@ -88,17 +122,17 @@ public struct Config {
public var value: Any? {
return dictionary["value"]
}

public var intValue: Int? { value as? Int }
public var boolValue: Bool? { value as? Bool }
public var stringValue: String? { value as? String }

// Instead of doing this we could provide custom classes and decode to them
public var arrayValue: [Config]? {
guard let list = value as? [Any] else { return nil }
return list.map { Config($0 as! [NSString : Any]) }
}

/// Tuple of token identifier and string value
public var tokenValue: (UInt, String)? { value as? (UInt, String) }
}
Expand All @@ -120,7 +154,7 @@ public class LanguageModelConfigurationFromHub {
return try await self.loadConfig(modelName: modelName, hubApi: hubApi)
}
}

public init(
modelFolder: URL,
hubApi: HubApi = .shared
Expand Down Expand Up @@ -179,47 +213,104 @@ public class LanguageModelConfigurationFromHub {
) async throws -> Configurations {
let filesToDownload = ["config.json", "tokenizer_config.json", "chat_template.json", "tokenizer.json"]
let repo = Hub.Repo(id: modelName)
let downloadedModelFolder = try await hubApi.snapshot(from: repo, matching: filesToDownload)

return try await loadConfig(modelFolder: downloadedModelFolder, hubApi: hubApi)
do {
let downloadedModelFolder = try await hubApi.snapshot(from: repo, matching: filesToDownload)
return try await loadConfig(modelFolder: downloadedModelFolder, hubApi: hubApi)
} catch {
// Convert generic errors to more specific ones
if let urlError = error as? URLError {
switch urlError.code {
case .notConnectedToInternet, .networkConnectionLost:
throw Hub.HubClientError.networkError(urlError)
case .resourceUnavailable:
throw Hub.HubClientError.resourceNotFound(modelName)
default:
throw Hub.HubClientError.networkError(urlError)
}
} else {
throw error
}
}
}

func loadConfig(
modelFolder: URL,
hubApi: HubApi = .shared
) async throws -> Configurations {
// Load required configurations
let modelConfig = try hubApi.configuration(fileURL: modelFolder.appending(path: "config.json"))
let tokenizerData = try hubApi.configuration(fileURL: modelFolder.appending(path: "tokenizer.json"))
// Load tokenizer config
var tokenizerConfig = try? hubApi.configuration(fileURL: modelFolder.appending(path: "tokenizer_config.json"))
// Check for chat template and merge if available
if let chatTemplateConfig = try? hubApi.configuration(fileURL: modelFolder.appending(path: "chat_template.json")),
let chatTemplate = chatTemplateConfig.chatTemplate?.stringValue {
// The value of chat_template could also be an array of strings, but we're not handling that case here, since it's discouraged.
// Create or update tokenizer config with chat template
if var configDict = tokenizerConfig?.dictionary {
configDict["chat_template"] = chatTemplate
tokenizerConfig = Config(configDict)
} else {
tokenizerConfig = Config(["chat_template": chatTemplate])
do {
// Load required configurations
let modelConfigURL = modelFolder.appending(path: "config.json")
guard FileManager.default.fileExists(atPath: modelConfigURL.path) else {
throw Hub.HubClientError.configurationMissing("config.json")
}

let modelConfig = try hubApi.configuration(fileURL: modelConfigURL)

let tokenizerDataURL = modelFolder.appending(path: "tokenizer.json")
guard FileManager.default.fileExists(atPath: tokenizerDataURL.path) else {
throw Hub.HubClientError.configurationMissing("tokenizer.json")
}

let tokenizerData = try hubApi.configuration(fileURL: tokenizerDataURL)

// Load tokenizer config (optional)
var tokenizerConfig: Config? = nil
let tokenizerConfigURL = modelFolder.appending(path: "tokenizer_config.json")
if FileManager.default.fileExists(atPath: tokenizerConfigURL.path) {
tokenizerConfig = try hubApi.configuration(fileURL: tokenizerConfigURL)
}

// Check for chat template and merge if available
let chatTemplateURL = modelFolder.appending(path: "chat_template.json")
if FileManager.default.fileExists(atPath: chatTemplateURL.path),
let chatTemplateConfig = try? hubApi.configuration(fileURL: chatTemplateURL),
let chatTemplate = chatTemplateConfig.chatTemplate?.stringValue {
// Create or update tokenizer config with chat template
if var configDict = tokenizerConfig?.dictionary {
configDict["chat_template"] = chatTemplate
tokenizerConfig = Config(configDict)
} else {
tokenizerConfig = Config(["chat_template": chatTemplate])
}
}

return Configurations(
modelConfig: modelConfig,
tokenizerConfig: tokenizerConfig,
tokenizerData: tokenizerData
)
} catch let error as Hub.HubClientError {
throw error
} catch {
if let nsError = error as NSError? {
if nsError.domain == NSCocoaErrorDomain && nsError.code == NSFileReadNoSuchFileError {
throw Hub.HubClientError.fileSystemError(error)
} else if nsError.domain == "NSJSONSerialization" {
throw Hub.HubClientError.parseError("Invalid JSON format: \(nsError.localizedDescription)")
}
}
throw Hub.HubClientError.fileSystemError(error)
}
return Configurations(
modelConfig: modelConfig,
tokenizerConfig: tokenizerConfig,
tokenizerData: tokenizerData
)
}

static func fallbackTokenizerConfig(for modelType: String) -> Config? {
guard let url = Bundle.module.url(forResource: "\(modelType)_tokenizer_config", withExtension: "json") else { return nil }
guard let url = Bundle.module.url(forResource: "\(modelType)_tokenizer_config", withExtension: "json") else {
return nil
}

do {
let data = try Data(contentsOf: url)
let parsed = try JSONSerialization.jsonObject(with: data, options: [])
guard let dictionary = parsed as? [NSString: Any] else { return nil }
guard let dictionary = parsed as? [NSString: Any] else {
throw Hub.HubClientError.parseError("Failed to parse fallback tokenizer config")
}
return Config(dictionary)
} catch let error as Hub.HubClientError {
print("Error loading fallback tokenizer config: \(error.localizedDescription)")
return nil
} catch {
print("Error loading fallback tokenizer config: \(error.localizedDescription)")
return nil
}
}
Expand Down
Loading