From f0102512584cd756afe8edd2a8c190fb34ce790e Mon Sep 17 00:00:00 2001 From: stephantul Date: Tue, 27 May 2025 10:34:32 +0200 Subject: [PATCH] clean-up print statement --- model2vec/model.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/model2vec/model.py b/model2vec/model.py index 9e186e1..200b95d 100644 --- a/model2vec/model.py +++ b/model2vec/model.py @@ -130,8 +130,6 @@ def tokenize(self, sentences: Sequence[str], max_length: int | None = None) -> l m = max_length * self.median_token_length sentences = [sentence[:m] for sentence in sentences] - max_len = max([len(sentence) for sentence in sentences]) - # self.tokenizer.model.max_input_chars_per_word = max_len + 1 if self._can_encode_fast: encodings: list[Encoding] = self.tokenizer.encode_batch_fast(sentences, add_special_tokens=False) else: