We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 906e05d commit 8aeaa91Copy full SHA for 8aeaa91
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -368,6 +368,7 @@ def __init__(
368
"weights")
369
self.input_quant = self.quant_config.target_scheme_map["Linear"].get(
370
"input_activations")
371
+ self.topk_indices_dtype = None
372
373
per_tensor = (self.weight_quant.strategy == QuantizationStrategy.TENSOR
374
and self.input_quant.strategy
@@ -738,6 +739,7 @@ def __init__(
738
739
740
from vllm.model_executor.layers.fused_moe.cutlass_moe import (
741
cutlass_moe_fp8)
742
743
self.fused_experts = cutlass_moe_fp8 # type: ignore
744
self.disable_expert_map = False
745
0 commit comments