Skip to content

Commit 75d7250

Browse files
committed
Added new methods, fixed visionos build
1 parent 316b293 commit 75d7250

File tree

4 files changed

+53
-15
lines changed

4 files changed

+53
-15
lines changed

Package.swift

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ let package = Package(
1010
.iOS(.v16),
1111
.tvOS(.v16),
1212
.watchOS(.v9),
13+
.visionOS(.v1),
1314
],
1415
products: [
1516
.executable(
@@ -34,13 +35,17 @@ let package = Package(
3435
.binaryTarget(
3536
name: "Sentencepiece",
3637
url:
37-
"https://github.com/jkrukowski/swift-sentencepiece/releases/download/0.0.4/sentencepiece.xcframework.zip",
38-
checksum: "9168a242ffc75cdecb3bd21f4671842bb48d583d1cbc83e5a255d3e348622680"
38+
"https://github.com/jkrukowski/swift-sentencepiece/releases/download/0.0.5/sentencepiece.xcframework.zip",
39+
checksum: "4b3b3fefc5ce55edd9fa8b0133b1027db19d3c2b27c63e993727aebe2b3545a8"
3940
),
4041
.target(
4142
name: "SentencepieceTokenizer",
4243
dependencies: [
4344
"Sentencepiece"
45+
],
46+
linkerSettings: [
47+
.linkedLibrary("c++"),
48+
.linkedLibrary("stdc++")
4449
]
4550
),
4651
.testTarget(
@@ -50,11 +55,6 @@ let package = Package(
5055
],
5156
resources: [
5257
.copy("Model")
53-
],
54-
linkerSettings: [
55-
.unsafeFlags([
56-
"-lc++"
57-
])
5858
]
5959
),
6060
]

Package@swift-5.9.swift

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// swift-tools-version: 6.0
1+
// swift-tools-version: 5.9
22
// The swift-tools-version declares the minimum version of Swift required to build this package.
33

44
import PackageDescription
@@ -10,6 +10,7 @@ let package = Package(
1010
.iOS(.v16),
1111
.tvOS(.v16),
1212
.watchOS(.v9),
13+
.visionOS(.v1),
1314
],
1415
products: [
1516
.executable(
@@ -34,13 +35,17 @@ let package = Package(
3435
.binaryTarget(
3536
name: "Sentencepiece",
3637
url:
37-
"https://github.com/jkrukowski/swift-sentencepiece/releases/download/0.0.4/sentencepiece.xcframework.zip",
38-
checksum: "9168a242ffc75cdecb3bd21f4671842bb48d583d1cbc83e5a255d3e348622680"
38+
"https://github.com/jkrukowski/swift-sentencepiece/releases/download/0.0.5/sentencepiece.xcframework.zip",
39+
checksum: "4b3b3fefc5ce55edd9fa8b0133b1027db19d3c2b27c63e993727aebe2b3545a8"
3940
),
4041
.target(
4142
name: "SentencepieceTokenizer",
4243
dependencies: [
4344
"Sentencepiece"
45+
],
46+
linkerSettings: [
47+
.linkedLibrary("c++"),
48+
.linkedLibrary("stdc++")
4449
]
4550
),
4651
.testTarget(
@@ -50,11 +55,6 @@ let package = Package(
5055
],
5156
resources: [
5257
.copy("Model")
53-
],
54-
linkerSettings: [
55-
.unsafeFlags([
56-
"-lc++"
57-
])
5858
]
5959
),
6060
]

Sources/SentencepieceTokenizer/SentencepieceTokenizer.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,18 @@ public final class SentencepieceTokenizer {
5656
}
5757

5858
public func idToToken(_ id: Int) throws -> String {
59+
precondition(id - tokenOffset >= 0)
5960
guard let tokenPtr = spm_id_to_piece(processor, Int32(id - tokenOffset)) else {
6061
throw Error.failedToProcess
6162
}
6263
defer { tokenPtr.deallocate() }
6364
return String(cString: tokenPtr)
6465
}
6566

67+
public func tokenToId(_ token: String) -> Int {
68+
Int(spm_piece_to_id(processor, token)) + tokenOffset
69+
}
70+
6671
public func encode(_ text: String) throws -> [Int] {
6772
var size: Int32 = 0
6873
guard let encodedPtr = spm_encode(processor, text, &size) else {
@@ -73,6 +78,10 @@ public final class SentencepieceTokenizer {
7378
return result.map { Int($0) + tokenOffset }
7479
}
7580

81+
public func setEncodeExtraOptions(_ options: String) {
82+
spm_set_encode_extra_options(processor, options)
83+
}
84+
7685
public func decode(_ ids: [Int]) throws -> String {
7786
let encoded = ids.map { Int32($0 - tokenOffset) }
7887
guard let decodedPtr = spm_decode(processor, encoded, Int32(encoded.count)) else {
@@ -81,4 +90,8 @@ public final class SentencepieceTokenizer {
8190
defer { decodedPtr.deallocate() }
8291
return String(cString: decodedPtr)
8392
}
93+
94+
public func setDecodeExtraOptions(_ options: String) {
95+
spm_set_decode_extra_options(processor, options)
96+
}
8497
}

Tests/SentencepieceTokenizerTests/SentencepieceTokenizerTests.swift

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ final class SentencepieceTokenizerTests: XCTestCase {
2626
XCTAssertEqual(try tokenizer.idToToken(8999), "▁world")
2727
XCTAssertEqual(try tokenizer.idToToken(38), "!")
2828

29+
XCTAssertEqual(tokenizer.tokenToId("▁Hello"), 35378)
30+
XCTAssertEqual(tokenizer.tokenToId(","), 4)
31+
XCTAssertEqual(tokenizer.tokenToId("▁world"), 8999)
32+
XCTAssertEqual(tokenizer.tokenToId("!"), 38)
33+
2934
XCTAssertEqual(try tokenizer.decode([]), "")
3035
XCTAssertEqual(try tokenizer.encode(""), [])
3136

@@ -34,4 +39,24 @@ final class SentencepieceTokenizerTests: XCTestCase {
3439
XCTAssertEqual(tokenizer.bosTokenId, 2)
3540
XCTAssertEqual(tokenizer.eosTokenId, 3)
3641
}
42+
43+
func testSentencepieceEncodeExtraOptions() throws {
44+
let modelPath = try XCTUnwrap(
45+
Bundle.module.path(
46+
forResource: "sentencepiece.bpe", ofType: "model", inDirectory: "Model"))
47+
let tokenizer = try SentencepieceTokenizer(modelPath: modelPath)
48+
tokenizer.setEncodeExtraOptions("reverse:bos:eos")
49+
let output = try tokenizer.encode("Hello, world!")
50+
XCTAssertEqual(output, [2, 38, 8999, 4, 35378, 3])
51+
}
52+
53+
func testSentencepieceDecodeExtraOptions() throws {
54+
let modelPath = try XCTUnwrap(
55+
Bundle.module.path(
56+
forResource: "sentencepiece.bpe", ofType: "model", inDirectory: "Model"))
57+
let tokenizer = try SentencepieceTokenizer(modelPath: modelPath)
58+
tokenizer.setDecodeExtraOptions("reverse:bos:eos")
59+
let output = try tokenizer.decode([2, 35378, 4, 8999, 38, 3])
60+
XCTAssertEqual(output, "! world, Hello")
61+
}
3762
}

0 commit comments

Comments
 (0)