File tree Expand file tree Collapse file tree 1 file changed +5
-4
lines changed
src/llmcompressor/modifiers/quantization Expand file tree Collapse file tree 1 file changed +5
-4
lines changed Original file line number Diff line number Diff line change @@ -97,10 +97,11 @@ def update(
97
97
# reshape for per channel scenario
98
98
num_heads = key_states .shape [1 ]
99
99
head_dim = key_states .shape [- 1 ]
100
- # from [batch_size, num_heads, seq_len - residual_length, head_dim]
101
- # to [batch_size, seq_len - residual_length, num_heads * head_dim]
102
- key_states = key_states .transpose (1 , 2 ).flatten (2 )
103
- value_states = value_states .transpose (1 , 2 ).flatten (2 )
100
+ if self .quantization_args .strategy == QuantizationStrategy .CHANNEL :
101
+ # from [batch_size, num_heads, seq_len - residual_length, head_dim]
102
+ # to [batch_size, seq_len - residual_length, num_heads * head_dim]
103
+ key_states = key_states .transpose (1 , 2 ).flatten (2 )
104
+ value_states = value_states .transpose (1 , 2 ).flatten (2 )
104
105
105
106
q_key_states = self ._quantize (
106
107
key_states .contiguous (), KVCacheScaleType .KEY , layer_idx
You can’t perform that action at this time.
0 commit comments