Skip to content

Commit 03b83ec

Browse files
authored
Revert "Move torchao/_models to benchmarks/_models" (#1844)
Revert "Move torchao/_models to benchmarks/_models (#1784)" This reverts commit 81a2813.
1 parent 4a5ab2d commit 03b83ec

File tree

91 files changed

+425
-445
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+425
-445
lines changed

.github/workflows/dashboard_perf_test.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,19 @@ jobs:
4242
4343
mkdir -p ${{ runner.temp }}/benchmark-results
4444
# llama3 - compile baseline
45-
${CONDA_RUN} python benchmarks/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
45+
${CONDA_RUN} python torchao/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
4646
4747
# llama3 - autoquant
48-
${CONDA_RUN} python benchmarks/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --quantization autoquant --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
48+
${CONDA_RUN} python torchao/_models/llama/generate.py --checkpoint_path "${CHECKPOINT_PATH}/${MODEL_REPO}/model.pth" --compile --compile_prefill --quantization autoquant --output_json_path ${{ runner.temp }}/benchmark-results/llama3-benchmark-results.json
4949
5050
# skipping SAM because of https://hud.pytorch.org/pr/pytorch/ao/1407
5151
# # SAM
5252
# ${CONDA_RUN} pip install git+https://github.com/pytorch-labs/segment-anything-fast.git@main
5353
# # SAM compile baselilne
54-
# ${CONDA_RUN} sh benchmarks/_models/sam/setup.sh
55-
# ${CONDA_RUN} python benchmarks/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
54+
# ${CONDA_RUN} sh torchao/_models/sam/setup.sh
55+
# ${CONDA_RUN} python torchao/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
5656
57-
# ${CONDA_RUN} python benchmarks/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --compression autoquant --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
57+
# ${CONDA_RUN} python torchao/_models/sam/eval_combo.py --coco_root_dir datasets/coco2017 --coco_slice_name val2017 --sam_checkpoint_base_path checkpoints --sam_model_type vit_h --point_sampling_cache_dir tmp/sam_coco_mask_center_cache --mask_debug_out_dir tmp/sam_eval_masks_out --batch_size 32 --num_workers 8 --use_compile max-autotune --use_half bfloat16 --device cuda --compression autoquant --output_json_path ${{ runner.temp }}/benchmark-results/sam-benchmark-results.json
5858
5959
# SAM 2.1
6060
# ${CONDA_RUN} sh scripts/download_sam2_ckpts.sh ${CHECKPOINT_PATH}/sam2

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ torchao just works with `torch.compile()` and `FSDP2` over most PyTorch models o
1919

2020
### Post Training Quantization
2121

22-
Quantizing and Sparsifying your models is a 1 liner that should work on any model with an `nn.Linear` including your favorite HuggingFace model. You can find a more comprehensive usage instructions [here](torchao/quantization/), sparsity [here](/benchmarks/_models/sam/README.md) and a HuggingFace inference example [here](scripts/hf_eval.py)
22+
Quantizing and Sparsifying your models is a 1 liner that should work on any model with an `nn.Linear` including your favorite HuggingFace model. You can find a more comprehensive usage instructions [here](torchao/quantization/), sparsity [here](/torchao/_models/sam/README.md) and a HuggingFace inference example [here](scripts/hf_eval.py)
2323

2424
For inference, we have the option of
2525
1. Quantize only the weights: works best for memory bound models
@@ -52,7 +52,7 @@ We also provide a developer facing API so you can implement your own quantizatio
5252

5353
We've added kv cache quantization and other features in order to enable long context length (and necessarily memory efficient) inference.
5454

55-
In practice these features alongside int4 weight only quantization allow us to **reduce peak memory by ~55%**, meaning we can Llama3.1-8B inference with a **130k context length with only 18.9 GB of peak memory.** More details can be found [here](benchmarks/_models/llama/README.md)
55+
In practice these features alongside int4 weight only quantization allow us to **reduce peak memory by ~55%**, meaning we can Llama3.1-8B inference with a **130k context length with only 18.9 GB of peak memory.** More details can be found [here](torchao/_models/llama/README.md)
5656

5757
## Training
5858

benchmarks/_models/llama/__init__.py

Whitespace-only changes.

benchmarks/_models/sam/__init__.py

Whitespace-only changes.

benchmarks/quantized_training/pretrain_llama2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,13 @@
2222
from torch.utils.checkpoint import checkpoint
2323
from tqdm import tqdm
2424

25-
from benchmarks._models.llama.model import (
25+
from torchao import quantize_
26+
from torchao._models.llama.model import (
2627
ModelArgs,
2728
RMSNorm,
2829
Transformer,
2930
transformer_configs,
3031
)
31-
from torchao import quantize_
3232
from torchao.prototype import low_bit_optim
3333
from torchao.prototype.quantized_training import (
3434
bitnet_training,

docs/source/contributor_guide.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,11 @@ After you have the quantization flow implemented, you can run benchmark and eval
125125

126126
Note: llama model (llama2/llama3) is our representative model for memory bound models and sam is our representative model for compute bound models.
127127

128-
* `llama <https://github.com/pytorch/ao/tree/main/benchmarks/_models/llama>`__
129-
* `benchmark <https://github.com/pytorch/ao/blob/main/benchmarks/_models/llama/generate.py>`__
130-
* `eval <https://github.com/pytorch/ao/blob/main/benchmarks/_models/llama/eval.py>`__
131-
* `sam <https://github.com/pytorch/ao/tree/main/benchmarks/_models/sam>`__
132-
* `benchmark and eval <https://github.com/pytorch/ao/blob/main/benchmarks/_models/sam/eval_combo.py>`__
128+
* `llama <https://github.com/pytorch/ao/tree/main/torchao/_models/llama>`__
129+
* `benchmark <https://github.com/pytorch/ao/blob/main/torchao/_models/llama/generate.py>`__
130+
* `eval <https://github.com/pytorch/ao/blob/main/torchao/_models/llama/eval.py>`__
131+
* `sam <https://github.com/pytorch/ao/tree/main/torchao/_models/sam>`__
132+
* `benchmark and eval <https://github.com/pytorch/ao/blob/main/torchao/_models/sam/eval_combo.py>`__
133133

134134
Please checkout the ``--help`` option for each of the script to understand the supported options, e.g. you can use ``--profile=profile_path`` to get the chrome trace of the run to understand detailed `chrome trace <https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html#using-tracing-functionality>`__.
135135

examples/sam2_amg_server/annotate_with_rle.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
)
1515
from tqdm import tqdm
1616

17-
from benchmarks._models.sam2.utils.amg import area_from_rle, rle_to_mask
17+
from torchao._models.sam2.utils.amg import area_from_rle, rle_to_mask
1818

1919

2020
def timestamped_print(*args, **kwargs):

examples/sam2_amg_server/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
show_anns,
1313
)
1414

15-
from benchmarks._models.sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
16-
from benchmarks._models.sam2.build_sam import build_sam2
17-
from benchmarks._models.sam2.utils.amg import rle_to_mask
15+
from torchao._models.sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
16+
from torchao._models.sam2.build_sam import build_sam2
17+
from torchao._models.sam2.utils.amg import rle_to_mask
1818

1919

2020
def main_docstring():

examples/sam2_amg_server/cli_on_modal.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,10 @@ def build(self):
8484
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
8585
from sam2.build_sam import build_sam2
8686
else:
87-
from benchmarks._models.sam2.automatic_mask_generator import (
87+
from torchao._models.sam2.automatic_mask_generator import (
8888
SAM2AutomaticMaskGenerator,
8989
)
90-
from benchmarks._models.sam2.build_sam import build_sam2
90+
from torchao._models.sam2.build_sam import build_sam2
9191

9292
os.chdir(f"{TARGET}ao_src_0/examples/sam2_amg_server")
9393
import sys
@@ -139,11 +139,11 @@ def build(self):
139139
from sam2.utils.amg import mask_to_rle_pytorch as mask_to_rle_pytorch_2
140140
from sam2.utils.amg import rle_to_mask
141141
else:
142-
from benchmarks._models.sam2.utils.amg import (
142+
from torchao._models.sam2.utils.amg import (
143143
mask_to_rle_pytorch_2,
144144
rle_to_mask,
145145
)
146-
from benchmarks._models.sam2.utils.amg import area_from_rle
146+
from torchao._models.sam2.utils.amg import area_from_rle
147147

148148
self.np = np
149149
self.tio = tio

examples/sam2_amg_server/compare_rle_lists.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import torch
88

99

10-
# from benchmarks._models.sam2.utils.amg import rle_to_mask
10+
# from torchao._models.sam2.utils.amg import rle_to_mask
1111
def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
1212
"""Compute a binary mask from an uncompressed RLE."""
1313
h, w = rle["size"]

0 commit comments

Comments
 (0)