Skip to content

Commit a6637e0

Browse files
authored
Remove comments
1 parent 4631505 commit a6637e0

File tree

3 files changed

+0
-14
lines changed

3 files changed

+0
-14
lines changed

Sources/Tokenizers/BPETokenizer.swift

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ class BPETokenizer: PreTrainedTokenizerModel {
163163
let bpeTokens = self.bpe(token: text).split(separator: " ").map { String($0) }
164164
for token in bpeTokens {
165165
if convertTokenToId(token) != unknownTokenId {
166-
// if let _ = tokensToIds[token as NSString] {
167166
tokens.append(token)
168167
} else {
169168
// TODO: if config.byte_fallback is False, append the unknown token instead

Sources/Tokenizers/Tokenizer.swift

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,6 @@ public class PreTrainedTokenizer: Tokenizer {
183183
}.joined(separator: "|")
184184
addedTokensRegex = try? NSRegularExpression(pattern: addedTokensRegexString, options: [])
185185

186-
// let addedTokensRegexString = (tokenizerData.addedTokens?.arrayValue ?? []).compactMap { addedToken in
187-
// guard let content = addedToken.content?.stringValue else { return nil }
188-
// let prefix = (addedToken.lstrip?.boolValue ?? false ? #"\s*"# : "")
189-
// let suffix = (addedToken.rstrip?.boolValue ?? false ? #"\s*"# : "")
190-
// let token = NSRegularExpression.escapedPattern(for: content)
191-
// return "\(prefix)(\(token))\(suffix)"
192-
// }.joined(separator: "|")
193-
// addedTokensRegex = try? NSRegularExpression(pattern: addedTokensRegexString, options: [])
194-
195186
// TODO: specialTokens are stored but never used
196187
self.specialTokens = specialTokens
197188
self.addedTokens = Set(addedTokens.keys)

Tests/TokenizersTests/TokenizerTests.swift

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,6 @@ class GemmaTokenizerTests: TokenizerTests {
7070
let cases = ["" /* 0x61 0x300 */, "à" /* 0xe0 */]
7171
let expected = [217138, 1305]
7272

73-
// for x in cases.map { $0.unicodeScalars.map { String(format:"0x%lX", $0.value) } } {
74-
// print(x)
75-
// }
76-
7773
// These are different characters
7874
for (s, expected) in zip(cases, expected) {
7975
let encoded = await tester.tokenizer?.encode(text: " " + s)

0 commit comments

Comments
 (0)