From c78ec28af178478902ba696a6f5cb4419513386b Mon Sep 17 00:00:00 2001 From: shavit Date: Tue, 25 Feb 2025 09:30:29 -0500 Subject: [PATCH 1/2] Match token value with array and tuple * Add match cases for token value * Add BartTokenizer to known tokenizers --- Sources/Hub/Hub.swift | 9 ++++++++- Sources/Tokenizers/Tokenizer.swift | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/Sources/Hub/Hub.swift b/Sources/Hub/Hub.swift index 4116dcb..76f74d2 100644 --- a/Sources/Hub/Hub.swift +++ b/Sources/Hub/Hub.swift @@ -100,7 +100,14 @@ public struct Config { } /// Tuple of token identifier and string value - public var tokenValue: (UInt, String)? { value as? (UInt, String) } + public var tokenValue: (UInt, String)? { + switch value { + case let (i, t) as (UInt, String): return (i, t) + case let (t, i) as (String, UInt): return (i, t) + case let a as [Any] where a.count == 2: return (a[1], a[0]) as? (UInt, String) + default: return nil + } + } } public class LanguageModelConfigurationFromHub { diff --git a/Sources/Tokenizers/Tokenizer.swift b/Sources/Tokenizers/Tokenizer.swift index db53337..210c543 100644 --- a/Sources/Tokenizers/Tokenizer.swift +++ b/Sources/Tokenizers/Tokenizer.swift @@ -77,6 +77,7 @@ public protocol PreTrainedTokenizerModel: TokenizingModel { struct TokenizerModel { static let knownTokenizers: [String : PreTrainedTokenizerModel.Type] = [ + "BartTokenizer" : BertTokenizer.self, "BertTokenizer" : BertTokenizer.self, "DistilbertTokenizer": BertTokenizer.self, "DistilBertTokenizer": BertTokenizer.self, From fcbb09e4323d26516367865a1d8250bd3f8dd651 Mon Sep 17 00:00:00 2001 From: shavit Date: Thu, 27 Feb 2025 14:40:15 -0500 Subject: [PATCH 2/2] Test reading token value in different order --- Sources/Hub/Hub.swift | 6 +++--- Tests/HubTests/HubTests.swift | 10 ++++++++++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Sources/Hub/Hub.swift b/Sources/Hub/Hub.swift index 76f74d2..fad2875 100644 --- a/Sources/Hub/Hub.swift +++ b/Sources/Hub/Hub.swift @@ -100,11 +100,11 @@ public struct Config { } /// Tuple of token identifier and string value - public var tokenValue: (UInt, String)? { - switch value { + public var tokenValue: (UInt, String)? { + guard let pair = value as? [Any], pair.count == 2 else { return nil } + switch (pair[0], pair[1]) { case let (i, t) as (UInt, String): return (i, t) case let (t, i) as (String, UInt): return (i, t) - case let a as [Any] where a.count == 2: return (a[1], a[0]) as? (UInt, String) default: return nil } } diff --git a/Tests/HubTests/HubTests.swift b/Tests/HubTests/HubTests.swift index 1d7bc86..a7f6203 100644 --- a/Tests/HubTests/HubTests.swift +++ b/Tests/HubTests/HubTests.swift @@ -118,4 +118,14 @@ class HubTests: XCTestCase { let vocab_dict = config.dictionary["vocab"] as! [String: Int] XCTAssertNotEqual(vocab_dict.count, 2) } + + func testConfigTokenValueDifferentOrder() { + let data: Data! = "{\"sep\": [\"\", 2], \"cls\": [0, \"\"]}".data(using: .utf8) + let dict = try! JSONSerialization.jsonObject(with: data, options: []) as! [NSString: Any] + let config = Config(dict) + XCTAssertEqual(config.sep!.tokenValue!.0, 2) + XCTAssertEqual(config.sep!.tokenValue!.1, "") + XCTAssertEqual(config.cls!.tokenValue!.0, 0) + XCTAssertEqual(config.cls!.tokenValue!.1, "") + } }