Skip to content

Commit fc65432

Browse files
Handle nsnumber score values (#94)
1 parent 0a606f5 commit fc65432

File tree

1 file changed

+15
-2
lines changed

1 file changed

+15
-2
lines changed

Sources/Tokenizers/UnigramTokenizer.swift

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
// Copyright © 2024 Hugging Face. All rights reserved.
77
//
88

9+
import Foundation
910
import Hub
1011

1112
class UnigramTokenizer: PreTrainedTokenizerModel {
@@ -37,8 +38,20 @@ class UnigramTokenizer: PreTrainedTokenizerModel {
3738
}
3839

3940
vocab = try configVocab.map { piece in
40-
guard let token = piece.first as? String else { throw TokenizerError.malformedVocab }
41-
guard let score = piece.last as? Float else { throw TokenizerError.malformedVocab }
41+
guard let token = piece.first as? String,
42+
let scoreValue = piece.last else {
43+
throw TokenizerError.malformedVocab
44+
}
45+
46+
let score: Float
47+
if let floatScore = scoreValue as? Float {
48+
score = floatScore
49+
} else if let numberScore = scoreValue as? NSNumber {
50+
score = numberScore.floatValue
51+
} else {
52+
throw TokenizerError.malformedVocab
53+
}
54+
4255
return SentencePieceToken(token: token, score: score)
4356
}
4457

0 commit comments

Comments
 (0)