Skip to content

Commit 5d48793

Browse files
committed
Switch test to microsoft tokenizer, verify in Python
1 parent b610c2d commit 5d48793

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

Tests/TokenizersTests/AddedTokensTests.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@ import Hub
1111

1212
class AddedTokensTests: XCTestCase {
1313
func testPhiAddedTokens() async throws {
14-
let tokenizer = try await AutoTokenizer.from(pretrained: "mlx-community/Phi-3-mini-128k-instruct-4bit")
14+
let tokenizer = try await AutoTokenizer.from(pretrained: "microsoft/Phi-3-mini-128k-instruct")
1515
let inputIds = tokenizer("This is the <|end|>. My only friend, the <|end|>")
16-
XCTAssertEqual(inputIds, [1, 910, 338, 278, 29871, 32007, 29889, 1619, 871, 5121, 29892, 278, 29871, 32007])
16+
XCTAssertEqual(inputIds, [910, 338, 278, 29871, 32007, 29889, 1619, 871, 5121, 29892, 278, 29871, 32007])
1717

1818
let decoded = tokenizer.decode(tokens: inputIds)
19-
XCTAssertEqual(decoded, "<s> This is the <|end|>. My only friend, the <|end|>")
19+
XCTAssertEqual(decoded, "This is the <|end|>. My only friend, the <|end|>")
2020
}
2121

2222
func testGemmaAddedTokens() async throws {

0 commit comments

Comments
 (0)