We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 851a559 commit ec07a42Copy full SHA for ec07a42
crates/bpe-openai/src/lib.rs
@@ -42,6 +42,12 @@ static BPE_O200K: LazyLock<Tokenizer> = LazyLock::new(|| {
42
43
pub use bpe::*;
44
45
+/// A byte-pair encoding tokenizer that supports a pre-tokenization regex.
46
+/// The direct methods on this type pre-tokenize the input text and should
47
+/// produce the same output as the tiktoken tokenizers. The type gives access
48
+/// to the regex and underlying bye-pair encoding if needed. Note that using
49
+/// the byte-pair encoding directly does not take the regex into account and
50
+/// may result in output that differs from tiktoken.
51
pub struct Tokenizer {
52
/// The byte-pair encoding for this tokenizer.
53
pub bpe: BytePairEncoding,
0 commit comments