Skip to content

Commit e850a7c

Browse files
authored
Merge branch 'main' into provide_moe_calibration_mode
2 parents f8c0b98 + b457898 commit e850a7c

File tree

23 files changed

+164
-148
lines changed

23 files changed

+164
-148
lines changed
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: prepare code coverage
2+
description: installs code coverage dependencies and exports an updated 'PYTEST_ADDOPTS' env var
3+
4+
runs:
5+
using: composite
6+
steps:
7+
- run: |-
8+
# install dependencies
9+
pip3 install coverage pytest-cov https://github.com/neuralmagic/pytest-nm-releng/archive/v0.4.0.tar.gz
10+
11+
# generate and source flags
12+
FLAGS_FILE="coverage_flags.sh"
13+
nmre-generate-coverage-flags --package "llmcompressor" --output-file "$FLAGS_FILE"
14+
source "$FLAGS_FILE"
15+
rm "$FLAGS_FILE"
16+
17+
# export defined/updated 'PYTEST_ADDOPTS' env var
18+
echo "PYTEST_ADDOPTS=$PYTEST_ADDOPTS" | tee -a "$GITHUB_ENV"
19+
shell: bash

.github/workflows/test-check-transformers.yaml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
11
name: Test Checks (Transformers)
22
on:
33
pull_request:
4-
branches: main
4+
branches: [ main ]
55
types: [ labeled, synchronize ]
66
push:
7-
branches: main
7+
branches: [ main ]
8+
workflow_dispatch:
9+
inputs:
10+
code_coverage:
11+
description: if enabled, code coverage metrics will be collected during the test run
12+
type: boolean
13+
default: false
814

915
env:
1016
CADENCE: "commit"
@@ -72,6 +78,9 @@ jobs:
7278
BUILD_TYPE=nightly pip3 install .
7379
- name: "Clean compressed-tensors directory"
7480
run: rm -r compressed-tensors/
81+
- name: "⚙️ Prepare code coverage"
82+
if: inputs.code_coverage
83+
uses: ./.github/actions/prepare-code-coverage
7584
- name: "🔬 Running transformers tests"
7685
if: (success() || failure()) && steps.install.outcome == 'success'
7786
run: |
@@ -104,3 +113,13 @@ jobs:
104113
if: (success() || failure()) && steps.install.outcome == 'success'
105114
run: |
106115
pytest -v tests/llmcompressor/transformers/kv_cache
116+
- name: "Upload coverage report"
117+
if: (success() || failure()) && inputs.code_coverage
118+
uses: actions/upload-artifact@v4
119+
with:
120+
name: transformers-tests-coverage-results
121+
path: |
122+
.coverage
123+
coverage-html
124+
coverage.json
125+
retention-days: 5

.github/workflows/test-check.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@ on:
44
branches:
55
- main
66
push:
7+
workflow_dispatch:
8+
inputs:
9+
code_coverage:
10+
description: if enabled, code coverage metrics will be collected during the test run
11+
type: boolean
12+
default: false
713

814
env:
915
CADENCE: "commit"
@@ -36,8 +42,21 @@ jobs:
3642
BUILD_TYPE=nightly pip3 install .
3743
- name: "Clean compressed-tensors directory"
3844
run: rm -r compressed-tensors/
45+
- name: "⚙️ Prepare code coverage"
46+
if: inputs.code_coverage
47+
uses: ./.github/actions/prepare-code-coverage
3948
- name: "🔬 Running base tests"
4049
run: make test
50+
- name: "Upload coverage report"
51+
if: (success() || failure()) && inputs.code_coverage
52+
uses: actions/upload-artifact@v4
53+
with:
54+
name: base-tests-coverage-results
55+
path: |
56+
.coverage
57+
coverage-html
58+
coverage.json
59+
retention-days: 5
4160

4261
pytorch-tests:
4362
runs-on: ubuntu-22.04
@@ -65,9 +84,23 @@ jobs:
6584
BUILD_TYPE=nightly pip3 install .
6685
- name: "Clean compressed-tensors directory"
6786
run: rm -r compressed-tensors/
87+
- name: "⚙️ Prepare code coverage"
88+
if: inputs.code_coverage
89+
uses: ./.github/actions/prepare-code-coverage
6890
- name: "🔬 Running pytorch tests"
6991
run: |
7092
pytest -v tests/llmcompressor/pytorch
93+
- name: "Upload coverage report"
94+
if: (success() || failure()) && inputs.code_coverage
95+
uses: actions/upload-artifact@v4
96+
with:
97+
name: pytorch-tests-coverage-results
98+
path: |
99+
.coverage
100+
coverage-html
101+
coverage.json
102+
retention-days: 5
103+
71104

72105
compat-pytorch-1_9-pytorch-tests:
73106
runs-on: ubuntu-22.04
@@ -95,6 +128,19 @@ jobs:
95128
BUILD_TYPE=nightly pip3 install .
96129
- name: "Clean compressed-tensors directory"
97130
run: rm -r compressed-tensors/
131+
- name: "⚙️ Prepare code coverage"
132+
if: inputs.code_coverage
133+
uses: ./.github/actions/prepare-code-coverage
98134
- name: "🔬 Running pytorch tests"
99135
run: |
100136
pytest -v tests/llmcompressor/pytorch
137+
- name: "Upload coverage report"
138+
if: (success() || failure()) && inputs.code_coverage
139+
uses: actions/upload-artifact@v4
140+
with:
141+
name: compat-pytorch-tests-coverage-results
142+
path: |
143+
.coverage
144+
coverage-html
145+
coverage.json
146+
retention-days: 5

examples/quantization_2of4_sparse_w4a16/llama7b_sparse_w4a16.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# NOTE: Fine tuning can require more steps than is shown in the example
2+
# See the Axolotl integration blog post for best fine tuning practices
3+
# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
4+
15
from pathlib import Path
26

37
import torch
@@ -74,6 +78,7 @@
7478
)
7579

7680
# Sparse finetune
81+
# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
7782
train(
7883
model=(output_path / "sparsity_stage"),
7984
**oneshot_kwargs,

examples/trl_mixin/ex_trl_constant.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# NOTE: Fine tuning can require more steps than is shown in the example
2+
# See the Axolotl integration blog post for best fine tuning practices
3+
# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
4+
15
from datasets import load_dataset
26
from sft_trainer import SFTTrainer
37
from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -46,6 +50,7 @@ def formatting_prompts_func(example):
4650
)
4751
model_args = ModelArguments(model=model)
4852

53+
# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
4954
trainer = SFTTrainer(
5055
model=model,
5156
processing_class=tokenizer,

examples/trl_mixin/ex_trl_distillation.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# NOTE: Fine tuning can require more steps than is shown in the example
2+
# See the Axolotl integration blog post for best fine tuning practices
3+
# https://developers.redhat.com/articles/2025/06/17/axolotl-meets-llm-compressor-fast-sparse-open
4+
15
from sft_trainer import SFTTrainer
26
from transformers import AutoModelForCausalLM, AutoTokenizer
37

@@ -60,6 +64,7 @@
6064
)
6165
model_args = ModelArguments(model=model, distill_teacher=teacher)
6266

67+
# This step can be supplanted by fine tuning via integrated FT libraries such as Axolotl
6368
trainer = SFTTrainer(
6469
model=model,
6570
teacher=teacher,

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ def localversion_func(version: ScmVersion) -> str:
146146
"torchvision",
147147
"librosa",
148148
"soundfile",
149+
"torchcodec",
149150
# linting, formatting, and type checking
150151
"black~=24.4.2",
151152
"isort~=5.13.2",

src/llmcompressor/args/dataset_arguments.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -172,15 +172,6 @@ class DatasetArguments(CustomDatasetArguments):
172172
),
173173
},
174174
)
175-
trust_remote_code_data: bool = field(
176-
default=False,
177-
metadata={
178-
"help": "Whether or not to allow for datasets defined on the Hub using "
179-
"a dataset script. This option should only be set to True for "
180-
"repositories you trust and in which you have read the code, as it "
181-
"will execute code present on the Hub on your local machine."
182-
},
183-
)
184175
# --- pipeline arguments --- #
185176
pipeline: Optional[str] = field(
186177
default="independent",

src/llmcompressor/entrypoints/oneshot.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,6 @@ def oneshot(
231231
overwrite_cache: bool = False,
232232
preprocessing_num_workers: Optional[int] = None,
233233
min_tokens_per_module: Optional[float] = None,
234-
trust_remote_code_data: bool = False,
235234
calibrate_moe_context: bool = False,
236235
# Miscellaneous arguments
237236
output_dir: Optional[str] = None,
@@ -294,8 +293,6 @@ def oneshot(
294293
preprocessing.
295294
:param min_tokens_per_module: Minimum percentage of tokens per
296295
module, relevant for MoE models.
297-
:param trust_remote_code_data: Whether to allow for datasets defined on the Hub
298-
using a dataset script.
299296
300297
# Miscellaneous arguments
301298
:param output_dir: Path to save the output model after calibration.

src/llmcompressor/observers/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import Any, Iterable, Optional, Tuple, Union
33

44
import torch
5+
from compressed_tensors import InternalModule
56
from compressed_tensors.quantization.quant_args import (
67
FP8_E4M3_DATA,
78
QuantizationArgs,
@@ -12,12 +13,11 @@
1213
from compressed_tensors.utils import safe_permute
1314
from loguru import logger
1415
from torch import FloatTensor, IntTensor, Tensor
15-
from torch.nn import Module
1616

1717
__all__ = ["Observer"]
1818

1919

20-
class Observer(Module, RegistryMixin):
20+
class Observer(InternalModule, RegistryMixin):
2121
"""
2222
Base Observer class to be subclassed for specific implementation.
2323
Subclasses should override `calculate_qparams` to return a scale, zero_point

0 commit comments

Comments
 (0)