From 89b3fa6b49d78a8cf45eb62b641ace562e94762b Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 18 Jun 2025 16:25:06 +0000 Subject: [PATCH 1/4] Remove in test --- .../finetune/test_finetune_no_recipe_custom_dataset.py | 1 - .../transformers/finetune/test_finetune_without_recipe.py | 2 -- .../transformers/finetune/test_oneshot_and_finetune.py | 1 - tests/llmcompressor/transformers/finetune/test_safetensors.py | 3 --- tests/llmcompressor/transformers/gptq/test_oneshot.py | 1 - tests/llmcompressor/transformers/obcq/test_consecutive_runs.py | 2 -- .../transformers/obcq/test_mask_structure_preservation.py | 2 -- tests/llmcompressor/transformers/obcq/test_obcq_completion.py | 1 - tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py | 1 - .../transformers/obcq/test_oneshot_with_modifier.py | 1 - 10 files changed, 15 deletions(-) diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py index 3ccc31e0d..6b525352c 100644 --- a/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py +++ b/tests/llmcompressor/transformers/finetune/test_finetune_no_recipe_custom_dataset.py @@ -40,7 +40,6 @@ def preprocessing_func(example): recipe=None, num_train_epochs=self.num_train_epochs, concatenate_data=concatenate_data, - oneshot_device=self.device, text_column="text", dataset_path=dataset_path, preprocessing_func=preprocessing_func, diff --git a/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py b/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py index 66f71f8b3..456736913 100644 --- a/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py +++ b/tests/llmcompressor/transformers/finetune/test_finetune_without_recipe.py @@ -24,7 +24,6 @@ def test_finetune_without_recipe(self): from llmcompressor import train recipe_str = None - device = "cuda:0" concatenate_data = False max_steps = 50 @@ -38,7 +37,6 @@ def test_finetune_without_recipe(self): max_steps=max_steps, concatenate_data=concatenate_data, splits=splits, - oneshot_device=device, ) def tearDown(self): diff --git a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py index e08421ee0..393418953 100644 --- a/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py +++ b/tests/llmcompressor/transformers/finetune/test_oneshot_and_finetune.py @@ -31,7 +31,6 @@ def _test_oneshot_and_finetune(self): splits=splits, recipe=self.recipe, num_calibration_samples=64, - oneshot_device=self.device, dataset_config_name=self.dataset_config_name, concatenate_data=self.concat_txt, output_dir=self.output, diff --git a/tests/llmcompressor/transformers/finetune/test_safetensors.py b/tests/llmcompressor/transformers/finetune/test_safetensors.py index 3015710f8..ee2576f3b 100644 --- a/tests/llmcompressor/transformers/finetune/test_safetensors.py +++ b/tests/llmcompressor/transformers/finetune/test_safetensors.py @@ -24,7 +24,6 @@ def setUp(self): def test_safetensors(self): from llmcompressor import train - device = "cuda:0" output_dir = self.output / "output1" max_steps = 10 splits = {"train": "train[:10%]"} @@ -35,7 +34,6 @@ def test_safetensors(self): output_dir=output_dir, max_steps=max_steps, splits=splits, - oneshot_device=device, ) assert os.path.exists(output_dir / "model.safetensors") @@ -49,7 +47,6 @@ def test_safetensors(self): output_dir=new_output_dir, max_steps=max_steps, splits=splits, - oneshot_device=device, ) def tearDown(self): diff --git a/tests/llmcompressor/transformers/gptq/test_oneshot.py b/tests/llmcompressor/transformers/gptq/test_oneshot.py index 1c1bc0b44..7c2aa114d 100644 --- a/tests/llmcompressor/transformers/gptq/test_oneshot.py +++ b/tests/llmcompressor/transformers/gptq/test_oneshot.py @@ -77,7 +77,6 @@ def test_oneshot_application(self): dataset=self.dataset, output_dir=self.output, recipe=self.recipe, - oneshot_device=self.device, num_calibration_samples=9, ) model_loaded = AutoModelForCausalLM.from_pretrained( diff --git a/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py b/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py index 901a94a5e..51f0fe0f8 100644 --- a/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py +++ b/tests/llmcompressor/transformers/obcq/test_consecutive_runs.py @@ -39,7 +39,6 @@ def _test_consecutive_runs( num_calibration_samples=num_calibration_samples, recipe=self.first_recipe, output_dir=self.output_first, - oneshot_device=self.device, ) first_model = AutoModelForCausalLM.from_pretrained( @@ -68,7 +67,6 @@ def _test_consecutive_runs( num_calibration_samples=num_calibration_samples, recipe=self.second_recipe, output_dir=self.output_second, - oneshot_device=self.device, ) second_model = AutoModelForCausalLM.from_pretrained( diff --git a/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py b/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py index 1419b773e..ed26c84a2 100644 --- a/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py +++ b/tests/llmcompressor/transformers/obcq/test_mask_structure_preservation.py @@ -60,7 +60,6 @@ def test_mask_structure_preserved(self): num_calibration_samples=num_calibration_samples, recipe=self.initial_pruning_only_recipe, output_dir=self.output_first, - oneshot_device=self.device, save_compressed=False, ) targetted_layer = first_tiny_model.model.layers[0].self_attn.k_proj @@ -82,7 +81,6 @@ def test_mask_structure_preserved(self): num_calibration_samples=num_calibration_samples, recipe=self.subsequent_prune_and_quant_recipe, output_dir=self.output_second, - oneshot_device=self.device, save_compressed=False, ) diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_completion.py b/tests/llmcompressor/transformers/obcq/test_obcq_completion.py index 9de389c51..40b0c0871 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_completion.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_completion.py @@ -59,7 +59,6 @@ def _test_oneshot_completion(self, model_name: str = None): model=self.model, dataset=self.dataset, splits={"calibration": f"train[:{self.num_samples}]"}, - oneshot_device=self.device, recipe=self.recipe, max_seq_length=512, num_calibration_samples=self.num_samples, diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py index 347eb5dc0..5d95677d3 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py @@ -82,7 +82,6 @@ def test_sparsities_gpu(self): model = oneshot( model=self.model, dataset=self.dataset, - oneshot_device=self.device, recipe=self.recipe, max_seq_length=128, num_calibration_samples=64, diff --git a/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py b/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py index fb1ab9723..7fe63bb05 100644 --- a/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py +++ b/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py @@ -45,7 +45,6 @@ def test_oneshot_with_modifier_object(self): recipe=recipe_str, concatenate_data=concatenate_data, splits=splits, - oneshot_device=device, ) def tearDown(self): From 3022001c53dbdba09e2f8a2ca3dbcd03c2468389 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 18 Jun 2025 16:26:56 +0000 Subject: [PATCH 2/4] remove from oneshot api/args --- src/llmcompressor/args/model_arguments.py | 7 ------- src/llmcompressor/entrypoints/oneshot.py | 2 -- 2 files changed, 9 deletions(-) diff --git a/src/llmcompressor/args/model_arguments.py b/src/llmcompressor/args/model_arguments.py index ea3c3936a..e68bd16aa 100644 --- a/src/llmcompressor/args/model_arguments.py +++ b/src/llmcompressor/args/model_arguments.py @@ -80,13 +80,6 @@ class ModelArguments: default=True, metadata={"help": "Whether to compress sparse models during save"}, ) - oneshot_device: Optional[str] = field( - default="cuda", - metadata={ - "help": "This argument is deprecated and nonfunctional " - "and will be removed in future release" - }, - ) model_revision: str = field( default="main", metadata={ diff --git a/src/llmcompressor/entrypoints/oneshot.py b/src/llmcompressor/entrypoints/oneshot.py index 945c71943..707aafedf 100644 --- a/src/llmcompressor/entrypoints/oneshot.py +++ b/src/llmcompressor/entrypoints/oneshot.py @@ -208,7 +208,6 @@ def oneshot( tie_word_embeddings: bool = False, trust_remote_code_model: bool = False, save_compressed: bool = True, - oneshot_device: str = "cuda:0", model_revision: str = "main", # Recipe arguments recipe: Optional[Union[str, List[str]]] = None, @@ -259,7 +258,6 @@ def oneshot( :param trust_remote_code_model: Whether to allow for custom models to execute their own modeling files. :param save_compressed: Whether to compress sparse models during save. - :param oneshot_device: Device to run oneshot calibration on. :param model_revision: The specific model version to use (can be branch name, tag, or commit id). From 504f700daae45fc804a160eaa1d2677517833ec9 Mon Sep 17 00:00:00 2001 From: Dipika Sikka Date: Wed, 18 Jun 2025 16:30:21 +0000 Subject: [PATCH 3/4] Remove extra variable --- .../transformers/obcq/test_oneshot_with_modifier.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py b/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py index 7fe63bb05..62f05d7ad 100644 --- a/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py +++ b/tests/llmcompressor/transformers/obcq/test_oneshot_with_modifier.py @@ -31,7 +31,6 @@ def test_oneshot_with_modifier_object(self): SparseGPTModifier(sparsity=0.5, targets=[r"re:model.layers.\d+$"]) ] - device = "cuda:0" concatenate_data = False num_calibration_samples = 64 output_dir = self.output / "oneshot_out" From 7b8747d0e660165185def549256c23592c82901a Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Thu, 19 Jun 2025 18:43:28 -0400 Subject: [PATCH 4/4] remove arg from tests Signed-off-by: Kyle Sayers --- .../transformers/obcq/test_obcq_sparsity.py | 1 - .../test_compress_tensor_utils.py | 19 +++---------------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py index 5d95677d3..12df5e6c6 100644 --- a/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py +++ b/tests/llmcompressor/transformers/obcq/test_obcq_sparsity.py @@ -33,7 +33,6 @@ def test_sparsities(self): model = oneshot( model=self.model, dataset=self.dataset, - oneshot_device=self.device, recipe=self.recipe, max_seq_length=128, num_calibration_samples=64, diff --git a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py index 8dd1a2cf5..140e706d1 100644 --- a/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py +++ b/tests/llmcompressor/transformers/sparsification/test_compress_tensor_utils.py @@ -47,9 +47,6 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path): recipe_str = "tests/llmcompressor/transformers/obcq/recipes/test_tiny2.yaml" expected_sparsity = 0.5 model_path = "nm-testing/llama2.c-stories15M" - device = "cuda:0" - if not torch.cuda.is_available(): - device = "cpu" dataset = "open_platypus" concatenate_data = False num_calibration_samples = 64 @@ -66,7 +63,6 @@ def test_sparse_model_reload(compressed, config, dtype, tmp_path): recipe=recipe_str, concatenate_data=concatenate_data, splits=splits, - oneshot_device=device, precision=dtype, clear_sparse_session=False, ) @@ -166,9 +162,7 @@ def test_quant_model_reload(format, dtype, tmp_path): "tests/llmcompressor/transformers/compression/recipes/new_quant_simple.yaml" ) model_path = "nm-testing/llama2.c-stories15M" - device = "cuda:0" - if not torch.cuda.is_available(): - device = "cpu" + device = "cuda:0" if not torch.cuda.is_available() else "cpu" dataset = "open_platypus" concatenate_data = False num_calibration_samples = 16 @@ -182,7 +176,6 @@ def test_quant_model_reload(format, dtype, tmp_path): recipe=recipe_str, concatenate_data=concatenate_data, splits=splits, - oneshot_device=device, precision=dtype, clear_sparse_session=False, ) @@ -362,9 +355,7 @@ def test_model_shared_tensors_gpu( def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tmp_path): from llmcompressor.pytorch.model_load.helpers import get_session_model - device = "cuda" - if not torch.cuda.is_available(): - device = "cpu" + device = "cuda:0" if not torch.cuda.is_available() else "cpu" dataset = "open_platypus" concatenate_data = False num_calibration_samples = 64 @@ -378,7 +369,6 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm recipe=recipe, concatenate_data=concatenate_data, splits=splits, - oneshot_device=device, clear_sparse_session=False, ) @@ -446,9 +436,7 @@ def test_compressor_stacking(model_stub, recipe, sparse_format, quant_format, tm ], ) def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp_path): - device = "cuda" - if not torch.cuda.is_available(): - device = "cpu" + device = "cuda:0" if not torch.cuda.is_available() else "cpu" dataset = "open_platypus" concatenate_data = False num_calibration_samples = 64 @@ -462,7 +450,6 @@ def test_sparse_24_compressor_is_lossless(model_stub, recipe, sparse_format, tmp recipe=recipe, concatenate_data=concatenate_data, splits=splits, - oneshot_device=device, clear_sparse_session=False, )