Skip to content

Commit 0add1a4

Browse files
authored
Make BertTokenizer public (#73)
1 parent 3bd0226 commit 0add1a4

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

Sources/Tokenizers/BertTokenizer.swift

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import Foundation
1010
import Hub
1111

12-
class BertTokenizer {
12+
public class BertTokenizer {
1313
private let basicTokenizer = BasicTokenizer()
1414
private let wordpieceTokenizer: WordpieceTokenizer
1515
private let maxLen = 512
@@ -18,12 +18,12 @@ class BertTokenizer {
1818
private let vocab: [String: Int]
1919
private let ids_to_tokens: [Int: String]
2020

21-
var bosToken: String?
22-
var bosTokenId: Int?
23-
var eosToken: String?
24-
var eosTokenId: Int?
21+
public var bosToken: String?
22+
public var bosTokenId: Int?
23+
public var eosToken: String?
24+
public var eosTokenId: Int?
2525

26-
init(vocab: [String: Int],
26+
public init(vocab: [String: Int],
2727
merges: [String]?,
2828
tokenizeChineseChars: Bool = true,
2929
bosToken: String? = nil,
@@ -39,7 +39,7 @@ class BertTokenizer {
3939
self.eosTokenId = eosToken == nil ? nil : vocab[eosToken!]
4040
}
4141

42-
required convenience init(tokenizerConfig: Config, tokenizerData: Config, addedTokens: [String : Int]) throws {
42+
public required convenience init(tokenizerConfig: Config, tokenizerData: Config, addedTokens: [String : Int]) throws {
4343
guard let vocab = tokenizerData.model?.vocab?.dictionary as? [String: Int] else {
4444
throw TokenizerError.missingVocab
4545
}
@@ -51,7 +51,7 @@ class BertTokenizer {
5151
}
5252

5353

54-
func tokenize(text: String) -> [String] {
54+
public func tokenize(text: String) -> [String] {
5555
let text = tokenizeChineseCharsIfNeed(text)
5656
var tokens: [String] = []
5757
for token in basicTokenizer.tokenize(text: text) {
@@ -128,21 +128,21 @@ class BertTokenizer {
128128

129129

130130
extension BertTokenizer: PreTrainedTokenizerModel {
131-
var unknownToken: String? { wordpieceTokenizer.unkToken }
132-
var unknownTokenId: Int? { vocab[unknownToken!] }
133-
131+
public var unknownToken: String? { wordpieceTokenizer.unkToken }
132+
public var unknownTokenId: Int? { vocab[unknownToken!] }
133+
134134
func encode(text: String) -> [Int] { tokenizeToIds(text: text) }
135135

136136
func decode(tokens: [Int]) -> String {
137137
let tokens = unTokenize(tokens: tokens)
138138
return convertWordpieceToBasicTokenList(tokens)
139139
}
140140

141-
func convertTokenToId(_ token: String) -> Int? {
141+
public func convertTokenToId(_ token: String) -> Int? {
142142
return vocab[token] ?? unknownTokenId
143143
}
144144

145-
func convertIdToToken(_ id: Int) -> String? {
145+
public func convertIdToToken(_ id: Int) -> String? {
146146
return ids_to_tokens[id]
147147
}
148148
}

0 commit comments

Comments
 (0)