Skip to content

Commit b610c2d

Browse files
committed
Use NSString in UnigramTokenizer
1 parent f2044bd commit b610c2d

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

Sources/Tokenizers/UnigramTokenizer.swift

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ class UnigramTokenizer: PreTrainedTokenizerModel {
2323
public var unknownToken: String? { unknownPiece.token }
2424

2525
let minScore: Float
26-
let tokensToIds: [String: Int]
27-
26+
let tokensToIds: [NSString: Int]
27+
2828
let bosToken: String? = " "
2929
let bosTokenId: Int?
3030
let eosToken: String?
@@ -63,20 +63,20 @@ class UnigramTokenizer: PreTrainedTokenizerModel {
6363
self.unknownTokenId = unknownTokenId
6464
self.unknownPiece = SentencePieceToken(token: vocab[unknownTokenId].token, score: minScore - 10)
6565

66-
tokensToIds = Dictionary(uniqueKeysWithValues: vocab.map { $0.token }.enumerated().map { ($1, $0) })
67-
bosTokenId = tokensToIds[bosToken!] // May be nil
68-
66+
tokensToIds = Dictionary(uniqueKeysWithValues: vocab.map { $0.token as NSString }.enumerated().map { ($1, $0) })
67+
bosTokenId = tokensToIds[bosToken! as NSString] // May be nil
68+
6969
eosToken = tokenizerConfig.eosToken?.stringValue
70-
eosTokenId = eosToken == nil ? nil : tokensToIds[eosToken!]
71-
70+
eosTokenId = eosToken == nil ? nil : tokensToIds[eosToken! as NSString]
71+
7272
trie = Trie()
7373
trie.append(contentsOf: vocab.map { $0.token })
7474

7575
// TODO: set fuse_unk to true
7676
}
7777

7878
func convertTokenToId(_ token: String) -> Int? {
79-
return tokensToIds[token] ?? self.unknownTokenId
79+
return tokensToIds[token as NSString] ?? self.unknownTokenId
8080
}
8181

8282
func convertIdToToken(_ id: Int) -> String? {
@@ -95,7 +95,7 @@ class UnigramTokenizer: PreTrainedTokenizerModel {
9595

9696
let beginIndex = sentence.index(sentence.startIndex, offsetBy: beginPos)
9797
for token in trie.commonPrefixSearchIterator(sentence[beginIndex...]).map({ String($0) }) {
98-
guard let tokenId = tokensToIds[token] else { fatalError("Token not in vocab: \(token)") }
98+
guard let tokenId = tokensToIds[token as NSString] else { fatalError("Token not in vocab: \(token)") }
9999
let tokenScore = vocab[tokenId].score
100100
lattice.insert(startOffset: beginPos, length: token.count, score: tokenScore, tokenId: tokenId)
101101
if !hasSingleNode && token.count == mblen {

0 commit comments

Comments
 (0)