Add dtype arg to the script for exporting HuggingFace models (#5716)

Guang Yang · facebook-github-bot · commit bdaad8e6807c · 2024-09-27T13:56:04.000-07:00
Summary: As titled. This will unblock delegating to XNNPACK w/ float16 and bfloat16, which provides a comparable perf data points against egear, torch.compile, and AOTI, etc. Pull Request resolved: #5716 Reviewed By: kirklandsign Differential Revision: D63499648 Pulled By: guangy10 fbshipit-source-id: 5a06454f8af664e6d5f469dcf63869ca7c57a6ba
diff --git a/extension/export_util/export_hf_model.py b/extension/export_util/export_hf_model.py
@@ -27,6 +27,14 @@ def main() -> None:
         default=None,
         help="a valid huggingface model repo name",
     )
+    parser.add_argument(
+        "-d",
+        "--dtype",
+        type=str,
+        choices=["float32", "float16", "bfloat16"],
+        default="float32",
+        help="specify the dtype for loading the model",
+    )
     parser.add_argument(
         "-o",
         "--output_name",
@@ -39,7 +47,8 @@ def main() -> None:
 
     # Configs to HF model
     device = "cpu"
-    dtype = torch.float32
+    # TODO: remove getattr once https://github.com/huggingface/transformers/pull/33741 is merged
+    dtype = getattr(torch, args.dtype)
     batch_size = 1
     max_length = 123
     cache_implementation = "static"