pytorch
diff --git a/‎.github/workflows/dashboard_perf_test.yml
Lines changed: 5 additions & 5 deletions b/‎.github/workflows/dashboard_perf_test.yml
Lines changed: 5 additions & 5 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎benchmarks/_models/llama/__init__.py b/‎benchmarks/_models/llama/__init__.py
diff --git a/‎benchmarks/_models/sam/__init__.py b/‎benchmarks/_models/sam/__init__.py
diff --git a/‎benchmarks/quantized_training/pretrain_llama2.py
Lines changed: 2 additions & 2 deletions b/‎benchmarks/quantized_training/pretrain_llama2.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/source/contributor_guide.rst
Lines changed: 5 additions & 5 deletions b/‎docs/source/contributor_guide.rst
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/sam2_amg_server/annotate_with_rle.py
Lines changed: 1 addition & 1 deletion b/‎examples/sam2_amg_server/annotate_with_rle.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/sam2_amg_server/cli.py
Lines changed: 3 additions & 3 deletions b/‎examples/sam2_amg_server/cli.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/sam2_amg_server/cli_on_modal.py
Lines changed: 4 additions & 4 deletions b/‎examples/sam2_amg_server/cli_on_modal.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎examples/sam2_amg_server/compare_rle_lists.py
Lines changed: 1 addition & 1 deletion b/‎examples/sam2_amg_server/compare_rle_lists.py
Lines changed: 1 addition & 1 deletion
@@ -42,19 +42,19 @@ jobs:
 
           mkdir -p ${{ runner.temp }}/benchmark-results
           # llama3 - compile baseline
-          ${CONDA_RUN} python benchmarks/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
+          ${CONDA_RUN} python torchao/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
 
           # llama3 - autoquant
-          ${CONDA_RUN} python benchmarks/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --quantization autoquant --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
+          ${CONDA_RUN} python torchao/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --quantization autoquant --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
 
           # skipping SAM because of https://hud.pytorch.org/pr/pytorch/ao/1407
           # # SAM
           # ${CONDA_RUN} pip install git+https://github.com/pytorch-labs/segment-anything-fast.git@main
           # # SAM compile baselilne
-          # ${CONDA_RUN} sh benchmarks/_models/sam/setup.sh
-          # ${CONDA_RUN} python benchmarks/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
+          # ${CONDA_RUN} sh torchao/_models/sam/setup.sh
+          # ${CONDA_RUN} python torchao/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
 
-          # ${CONDA_RUN} python benchmarks/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --compression autoquant --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
+          # ${CONDA_RUN} python torchao/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --compression autoquant --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
 
           # SAM 2.1
           # ${CONDA_RUN} sh scripts/download_sam2_ckpts.sh ${CHECKPOINT_PATH}/sam2
 
@@ -19,7 +19,7 @@ torchao just works with `torch.compile()` and `FSDP2` over most PyTorch models o
 
 ### Post Training Quantization
 
-Quantizing and Sparsifying your models is a 1 liner that should work on any model with an `nn.Linear` including your favorite HuggingFace model. You can find a more comprehensive usage instructions [here](torchao/quantization/), sparsity [here](/benchmarks/_models/sam/README.md) and a HuggingFace inference example [here](scripts/hf_eval.py)
+Quantizing and Sparsifying your models is a 1 liner that should work on any model with an `nn.Linear` including your favorite HuggingFace model. You can find a more comprehensive usage instructions [here](torchao/quantization/), sparsity [here](/torchao/_models/sam/README.md) and a HuggingFace inference example [here](scripts/hf_eval.py)
 
 For inference, we have the option of
 1. Quantize only the weights: works best for memory bound models
@@ -52,7 +52,7 @@ We also provide a developer facing API so you can implement your own quantizatio
 
 We've added kv cache quantization and other features in order to enable long context length (and necessarily memory efficient) inference.
 
-In practice these features alongside int4 weight only quantization allow us to **reduce peak memory by ~55%**, meaning we can Llama3.1-8B inference with a **130k context length with only 18.9 GB of peak memory.** More details can be found [here](benchmarks/_models/llama/README.md)
+In practice these features alongside int4 weight only quantization allow us to **reduce peak memory by ~55%**, meaning we can Llama3.1-8B inference with a **130k context length with only 18.9 GB of peak memory.** More details can be found [here](torchao/_models/llama/README.md)
 
 ## Training
 
 
@@ -22,13 +22,13 @@
 from torch.utils.checkpoint import checkpoint
 from tqdm import tqdm
 
-from benchmarks._models.llama.model import (
+from torchao import quantize_
+from torchao._models.llama.model import (
     ModelArgs,
     RMSNorm,
     Transformer,
     transformer_configs,
 )
-from torchao import quantize_
 from torchao.prototype import low_bit_optim
 from torchao.prototype.quantized_training import (
     bitnet_training,
 
@@ -125,11 +125,11 @@ After you have the quantization flow implemented, you can run benchmark and eval
 
 Note: llama model (llama2/llama3) is our representative model for memory bound models and sam is our representative model for compute bound models.
 
-* `llama <https://github.com/pytorch/ao/tree/main/benchmarks/_models/llama>`__
-  * `benchmark <https://github.com/pytorch/ao/blob/main/benchmarks/_models/llama/generate.py>`__
-  * `eval <https://github.com/pytorch/ao/blob/main/benchmarks/_models/llama/eval.py>`__
-* `sam <https://github.com/pytorch/ao/tree/main/benchmarks/_models/sam>`__
-  * `benchmark and eval <https://github.com/pytorch/ao/blob/main/benchmarks/_models/sam/eval_combo.py>`__
+* `llama <https://github.com/pytorch/ao/tree/main/torchao/_models/llama>`__
+  * `benchmark <https://github.com/pytorch/ao/blob/main/torchao/_models/llama/generate.py>`__
+  * `eval <https://github.com/pytorch/ao/blob/main/torchao/_models/llama/eval.py>`__
+* `sam <https://github.com/pytorch/ao/tree/main/torchao/_models/sam>`__
+  * `benchmark and eval <https://github.com/pytorch/ao/blob/main/torchao/_models/sam/eval_combo.py>`__
 
 Please checkout the ``--help`` option for each of the script to understand the supported options, e.g. you can use ``--profile=profile_path`` to get the chrome trace of the run to understand detailed `chrome trace <https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html#using-tracing-functionality>`__.
 
 
@@ -14,7 +14,7 @@
 )
 from tqdm import tqdm
 
-from benchmarks._models.sam2.utils.amg import area_from_rle, rle_to_mask
+from torchao._models.sam2.utils.amg import area_from_rle, rle_to_mask
 
 
 def timestamped_print(*args, **kwargs):
 
@@ -12,9 +12,9 @@
     show_anns,
 )
 
-from benchmarks._models.sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
-from benchmarks._models.sam2.build_sam import build_sam2
-from benchmarks._models.sam2.utils.amg import rle_to_mask
+from torchao._models.sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+from torchao._models.sam2.build_sam import build_sam2
+from torchao._models.sam2.utils.amg import rle_to_mask
 
 
 def main_docstring():
 
@@ -84,10 +84,10 @@ def build(self):
             from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
             from sam2.build_sam import build_sam2
         else:
-            from benchmarks._models.sam2.automatic_mask_generator import (
+            from torchao._models.sam2.automatic_mask_generator import (
                 SAM2AutomaticMaskGenerator,
             )
-            from benchmarks._models.sam2.build_sam import build_sam2
+            from torchao._models.sam2.build_sam import build_sam2
 
         os.chdir(f"{TARGET}ao_src_0/examples/sam2_amg_server")
         import sys
@@ -139,11 +139,11 @@ def build(self):
             from sam2.utils.amg import mask_to_rle_pytorch as mask_to_rle_pytorch_2
             from sam2.utils.amg import rle_to_mask
         else:
-            from benchmarks._models.sam2.utils.amg import (
+            from torchao._models.sam2.utils.amg import (
                 mask_to_rle_pytorch_2,
                 rle_to_mask,
             )
-        from benchmarks._models.sam2.utils.amg import area_from_rle
+        from torchao._models.sam2.utils.amg import area_from_rle
 
         self.np = np
         self.tio = tio
 
@@ -7,7 +7,7 @@
 import torch
 
 
-# from benchmarks._models.sam2.utils.amg import rle_to_mask
+# from torchao._models.sam2.utils.amg import rle_to_mask
 def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
     """Compute a binary mask from an uncompressed RLE."""
     h, w = rle["size"]
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`)`
`15`	`15`	`from tqdm import tqdm`
`16`	`16`
`17`		`-from benchmarks._models.sam2.utils.amg import area_from_rle, rle_to_mask`
	`17`	`+from torchao._models.sam2.utils.amg import area_from_rle, rle_to_mask`
`18`	`18`
`19`	`19`
`20`	`20`	`def timestamped_print(args, *kwargs):`