pytorch · H-Huang · Jul 11, 2025 · Jul 11, 2025
@@ -104,7 +104,7 @@ Once you have confirmed access, you can run the following command to download th
 # Get your HF token from https://huggingface.co/settings/tokens
 
 # Llama 3.1 tokenizer
-python scripts/download_tokenizer.py --repo_id meta-llama/Meta-Llama-3.1-8B --hf_token=...
+python scripts/download_tokenizer.py --repo_id meta-llama/Llama-3.1-8B --hf_token=...
 ```
 
 ### Start a training run

@@ -32,7 +32,7 @@ def download_hf_tokenizer_files(
     - special_tokens_map.json - Special token mappings
 
     Args:
-        repo_id (str): HuggingFace repository ID (e.g., "meta-llama/Meta-Llama-3.1-8B")
+        repo_id (str): HuggingFace repository ID (e.g., meta-llama/Llama-3.1-8B")
         local_dir (str): Local directory to save tokenizer files. A subdirectory
                         named after the model will be created automatically.
         hf_token (Optional[str]): HuggingFace API token for accessing private repositories.
@@ -141,7 +141,7 @@ def is_tokenizer_file(filename: str) -> bool:
         "--repo_id",
         type=str,
         required=True,
-        help="Repository ID to download from (e.g., 'meta-llama/Meta-Llama-3.1-8B', 'deepseek-ai/DeepSeek-V3')",
+        help="Repository ID to download from (e.g., 'meta-llama/Llama-3.1-8B', 'deepseek-ai/DeepSeek-V3')",
     )
     parser.add_argument(
         "--hf_token",

@@ -244,7 +244,7 @@ def get_added_tokens_func(tokenizer):
     @parametrize(
         "test_repo_id",
         [
-            "meta-llama/Meta-Llama-3.1-8B",
+            "meta-llama/Llama-3.1-8B",
             "deepseek-ai/DeepSeek-V3",
             # "black-forest-labs/FLUX.1-dev", TODO: load the actual tokenizer
             "Qwen/Qwen2-7B",
@@ -267,9 +267,9 @@ def test_download_and_build_tokenizer(self, test_repo_id):
                 local_dir=self.temp_dir,
             )
         except HTTPError as e:
-            if test_repo_id == "meta-llama/Meta-Llama-3.1-8B":
+            if test_repo_id == "meta-llama/Llama-3.1-8B":
                 self.skipTest(
-                    f"Could not download tokenizer files for Meta-Llama-3.1-8B: {e}"
+                    f"Could not download tokenizer files for Llama-3.1-8B: {e}"
                 )
             else:
                 raise e

@@ -891,7 +891,7 @@ def _validate_config(self) -> None:
             if self.config.model.tokenizer_path.endswith("tokenizer.model"):
                 raise Exception(
                     "You are using the old tokenizer.model, please redownload the tokenizer ",
-                    "(python scripts/download_tokenizer.py --repo_id meta-llama/Meta-Llama-3.1-8B) ",
+                    "(python scripts/download_tokenizer.py --repo_id meta-llama/Llama-3.1-8B) ",
                     " and update your config to the directory of the downloaded tokenizer.",
                 )