File tree Expand file tree Collapse file tree 2 files changed +1
-4
lines changed Expand file tree Collapse file tree 2 files changed +1
-4
lines changed Original file line number Diff line number Diff line change @@ -408,14 +408,11 @@ def build_hf_tokenizer(
408
408
) -> Union [HuggingFaceTokenizer , BaseTokenizer ]:
409
409
"""
410
410
Builds a HuggingFaceTokenizer from the specified path.
411
-
412
411
This function creates a HuggingFaceTokenizer instance that handles BOS/EOS token
413
412
inference and intelligent encoding. The tokenizer automatically detects and loads
414
413
from various file formats and infers special token behavior.
415
-
416
414
Args:
417
415
JobConfig: A JobConfig object containing the path to the tokenizer directory.
418
-
419
416
Returns:
420
417
tokenizer (HuggingFaceTokenizer): Loaded tokenizer instance with intelligent BOS/EOS handling
421
418
"""
Original file line number Diff line number Diff line change @@ -128,7 +128,7 @@ def __init__(self, job_config: JobConfig):
128
128
129
129
# build dataloader
130
130
tokenizer = (
131
- self .train_spec .build_tokenizer_fn (job_config . model . tokenizer_path )
131
+ self .train_spec .build_tokenizer_fn (job_config )
132
132
if self .train_spec .build_tokenizer_fn is not None
133
133
else None
134
134
)
You can’t perform that action at this time.
0 commit comments