diff --git a/src/lib.rs b/src/lib.rs index 0203acf..047022d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ // This check is new and seems buggy (possibly with PyO3 interaction) #![allow(clippy::borrow_deref_ref)] -use std::collections::HashSet; use std::num::NonZeroU64; use std::thread; @@ -12,6 +11,7 @@ use pyo3::pybacked::PyBackedStr; use pyo3::types::{PyBytes, PyList, PyTuple}; use pyo3::PyResult; use rustc_hash::FxHashMap as HashMap; +use rustc_hash::FxHashSet as HashSet; type Rank = u32; @@ -317,7 +317,7 @@ impl CoreBPE { if last_piece_token_len == 0 { // If last_piece_token_len is zero, the last token was a special token and we have // no unstable bytes - return (tokens, HashSet::new()); + return (tokens, HashSet::default()); } let (mut tokens, last_piece_token_len) = self._increase_last_piece_token_len(tokens, last_piece_token_len); @@ -331,7 +331,7 @@ impl CoreBPE { // This would reduce the amount of retokenising when determining completions // Refer to the logic in an older version of this file - let mut completions = HashSet::new(); + let mut completions = HashSet::default(); if unstable_bytes.is_empty() { return (tokens, completions); } @@ -514,7 +514,7 @@ impl CoreBPE { Ok(text) => self._encode_ordinary_native(text), Err(e) => { let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) }; - let (tokens, last_piece_token_len) = self._encode_native(text, &HashSet::new()); + let (tokens, last_piece_token_len) = self._encode_native(text, &HashSet::default()); let (mut tokens, last_piece_token_len) = self._increase_last_piece_token_len(tokens, last_piece_token_len); if !tokens.is_empty() && last_piece_token_len > 0 {