PR comments

dsikka · dsikka · commit ea930897fd9f · 2025-07-14T20:24:17.000Z
diff --git a/src/llmcompressor/modeling/prepare.py b/src/llmcompressor/modeling/prepare.py
@@ -29,7 +29,7 @@ def replace_modules_for_calibration(model: PreTrainedModel) -> PreTrainedModel:
 
 
 def update_qwen3_moe(model, stack):
-    for _, module in model.named_modules():
+    for module in model.modules():
         cls_name = module.__class__.__name__
         if cls_name == "Qwen3MoeDecoderLayer":
             stack.enter_context(
@@ -38,7 +38,7 @@ def update_qwen3_moe(model, stack):
 
 
 def update_deepseek3_moe(model, stack):
-    for _, module in model.named_modules():
+    for module in model.modules():
         cls_name = module.__class__.__name__
         if (
             cls_name == "DeepseekV3DecoderLayer"
@@ -55,6 +55,8 @@ def update_deepseek3_moe(model, stack):
 }
 
 def moe_calibration_context(model: PreTrainedModel, stack):
+    # Temporarily updates the MoE modules within the context
+    # Once the context exists, parameter updates persist
     cls_name = model.__class__.__name__
     if cls_name in moe_context:
         moe_context.get(cls_name)(model, stack)
diff --git a/src/llmcompressor/modeling/qwen3_moe.py b/src/llmcompressor/modeling/qwen3_moe.py
@@ -1,3 +1,19 @@
+# coding=utf-8
+# Copyright 2025 The Qwen team, Alibaba Group and the HuggingFace Inc. team.
+# All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import torch