1
+ import copy
2
+
1
3
import pytest
2
4
import torch
3
5
import transformers
4
6
5
7
from neural_compressor .torch .algorithms .weight_only .autoround import AutoRoundQuantizer , get_autoround_default_run_fn
6
- from neural_compressor .torch .quantization import AutoRoundConfig , quantize
8
+ from neural_compressor .torch .quantization import (
9
+ AutoRoundConfig ,
10
+ convert ,
11
+ get_default_AutoRound_config ,
12
+ prepare ,
13
+ quantize ,
14
+ )
7
15
from neural_compressor .torch .utils import logger
8
16
9
17
try :
14
22
auto_round_installed = False
15
23
16
24
17
- @pytest .fixture (scope = "module" )
18
- def gpt_j ():
25
+ def get_gpt_j ():
19
26
tiny_gptj = transformers .AutoModelForCausalLM .from_pretrained (
20
27
"hf-internal-testing/tiny-random-GPTJForCausalLM" ,
21
28
torchscript = True ,
@@ -25,17 +32,15 @@ def gpt_j():
25
32
26
33
@pytest .mark .skipif (not auto_round_installed , reason = "auto_round module is not installed" )
27
34
class TestAutoRound :
28
- @staticmethod
29
- @pytest .fixture (scope = "class" , autouse = True )
30
- def gpt_j_model (gpt_j ):
31
- yield gpt_j
35
+ def setup_class (self ):
36
+ self .gptj = get_gpt_j ()
32
37
33
38
def setup_method (self , method ):
34
39
logger .info (f"Running TestAutoRound test: { method .__name__ } " )
35
40
36
- def test_autoround (self , gpt_j_model ):
41
+ def test_autoround (self ):
37
42
inp = torch .ones ([1 , 10 ], dtype = torch .long )
38
-
43
+ gpt_j_model = copy . deepcopy ( self . gptj )
39
44
tokenizer = transformers .AutoTokenizer .from_pretrained (
40
45
"hf-internal-testing/tiny-random-GPTJForCausalLM" , trust_remote_code = True
41
46
)
@@ -73,9 +78,9 @@ def test_autoround(self, gpt_j_model):
73
78
assert "scale" in q_model .autoround_config ["transformer.h.0.attn.k_proj" ].keys ()
74
79
assert torch .float32 == q_model .autoround_config ["transformer.h.0.attn.k_proj" ]["scale_dtype" ]
75
80
76
- def test_new_api (self , gpt_j_model ):
81
+ def test_quantizer (self ):
77
82
inp = torch .ones ([1 , 10 ], dtype = torch .long )
78
-
83
+ gpt_j_model = copy . deepcopy ( self . gptj )
79
84
tokenizer = transformers .AutoTokenizer .from_pretrained (
80
85
"hf-internal-testing/tiny-random-GPTJForCausalLM" , trust_remote_code = True
81
86
)
@@ -110,3 +115,34 @@ def test_new_api(self, gpt_j_model):
110
115
assert "transformer.h.0.attn.k_proj" in q_model .autoround_config .keys ()
111
116
assert "scale" in q_model .autoround_config ["transformer.h.0.attn.k_proj" ].keys ()
112
117
assert torch .float32 == q_model .autoround_config ["transformer.h.0.attn.k_proj" ]["scale_dtype" ]
118
+
119
+ def test_prepare_and_convert_api (self ):
120
+ inp = torch .ones ([1 , 10 ], dtype = torch .long )
121
+ gpt_j_model = copy .deepcopy (self .gptj )
122
+ tokenizer = transformers .AutoTokenizer .from_pretrained (
123
+ "hf-internal-testing/tiny-random-GPTJForCausalLM" , trust_remote_code = True
124
+ )
125
+
126
+ out1 = gpt_j_model (inp )
127
+ quant_config = get_default_AutoRound_config ()
128
+ logger .info (f"Test AutoRound with config { quant_config } " )
129
+
130
+ run_fn = get_autoround_default_run_fn
131
+ run_args = (
132
+ tokenizer ,
133
+ "NeelNanda/pile-10k" ,
134
+ 20 ,
135
+ 10 ,
136
+ )
137
+ fp32_model = gpt_j_model
138
+
139
+ # quantizer execute
140
+ model = prepare (model = fp32_model , quant_config = quant_config )
141
+ run_fn (model , * run_args )
142
+ q_model = convert (model )
143
+
144
+ out2 = q_model (inp )
145
+ assert torch .allclose (out1 [0 ], out2 [0 ], atol = 1e-1 )
146
+ assert "transformer.h.0.attn.k_proj" in q_model .autoround_config .keys ()
147
+ assert "scale" in q_model .autoround_config ["transformer.h.0.attn.k_proj" ].keys ()
148
+ assert torch .float32 == q_model .autoround_config ["transformer.h.0.attn.k_proj" ]["scale_dtype" ]
0 commit comments