Skip to content

Commit 599e11d

Browse files
lint
1 parent 7f627d5 commit 599e11d

File tree

2 files changed

+14
-13
lines changed

2 files changed

+14
-13
lines changed

crates/bpe-openai/src/lib.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,28 @@ use fancy_regex::Regex;
66

77
static BPE_R50K: LazyLock<Tokenizer> = LazyLock::new(|| {
88
let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_r50k.dict"));
9-
let bpe = rmp_serde::from_slice(bytes).expect("");
9+
let bpe = rmp_serde::from_slice(bytes).expect("valid bpe data");
1010
let pat = "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+";
11-
Tokenizer::new(bpe, Some(pat)).unwrap()
11+
Tokenizer::new(bpe, Some(pat)).expect("valid regex")
1212
});
1313

1414
static BPE_P50K: LazyLock<Tokenizer> = LazyLock::new(|| {
1515
let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_p50k.dict"));
16-
let bpe = rmp_serde::from_slice(bytes).expect("");
16+
let bpe = rmp_serde::from_slice(bytes).expect("valid bpe data");
1717
let pat = "'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)|\\s+";
18-
Tokenizer::new(bpe, Some(pat)).unwrap()
18+
Tokenizer::new(bpe, Some(pat)).expect("valid regex")
1919
});
2020

2121
static BPE_CL100K: LazyLock<Tokenizer> = LazyLock::new(|| {
2222
let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_cl100k.dict"));
23-
let bpe = rmp_serde::from_slice(bytes).expect("");
23+
let bpe = rmp_serde::from_slice(bytes).expect("valid bpe data");
2424
let pat = "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+";
25-
Tokenizer::new(bpe, Some(pat)).unwrap()
25+
Tokenizer::new(bpe, Some(pat)).expect("valid regex")
2626
});
2727

2828
static BPE_O200K: LazyLock<Tokenizer> = LazyLock::new(|| {
2929
let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_o200k.dict"));
30-
let bpe = rmp_serde::from_slice(bytes).expect("");
30+
let bpe = rmp_serde::from_slice(bytes).expect("valid bpe data");
3131
let pat = [
3232
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?",
3333
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?",
@@ -37,7 +37,7 @@ static BPE_O200K: LazyLock<Tokenizer> = LazyLock::new(|| {
3737
"\\s+(?!\\S)",
3838
"\\s+",
3939
].join("|");
40-
Tokenizer::new(bpe, Some(&pat)).unwrap()
40+
Tokenizer::new(bpe, Some(&pat)).expect("valid regex")
4141
});
4242

4343
pub use bpe::*;
@@ -50,8 +50,9 @@ pub struct Tokenizer {
5050
}
5151

5252
impl Tokenizer {
53+
#[allow(clippy::result_large_err)]
5354
pub fn new(bpe: BytePairEncoding, pat: Option<&str>) -> fancy_regex::Result<Self> {
54-
let pat = pat.map(|pat| fancy_regex::Regex::new(pat)).transpose()?;
55+
let pat = pat.map(fancy_regex::Regex::new).transpose()?;
5556
Ok(Self { bpe, pat })
5657
}
5758

crates/bpe/benchmarks/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ pub static TOKENIZERS: LazyLock<
1919
(
2020
"cl100k",
2121
bpe_openai::cl100k(),
22-
tiktoken_rs::cl100k_base().unwrap(),
23-
HuggingfaceTokenizer::from_pretrained("Xenova/gpt-4", None).unwrap(),
22+
tiktoken_rs::cl100k_base().expect("tokenizer available"),
23+
HuggingfaceTokenizer::from_pretrained("Xenova/gpt-4", None).expect("model available"),
2424
),
2525
(
2626
"o200k",
2727
bpe_openai::o200k(),
28-
tiktoken_rs::o200k_base().unwrap(),
29-
HuggingfaceTokenizer::from_pretrained("Xenova/gpt-4o", None).unwrap(),
28+
tiktoken_rs::o200k_base().expect("tokenizer available"),
29+
HuggingfaceTokenizer::from_pretrained("Xenova/gpt-4o", None).expect("model available"),
3030
),
3131
]
3232
});

0 commit comments

Comments
 (0)