Skip to content

Commit 709ab81

Browse files
committed
clean moe
1 parent bd6e2b8 commit 709ab81

File tree

1 file changed

+1
-3
lines changed
  • torchtitan/models/deepseek-v3/model

1 file changed

+1
-3
lines changed

torchtitan/models/deepseek-v3/model/moe.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,11 @@ def __init__(
114114
self.num_experts = num_experts
115115
self.top_k = top_k
116116
self.use_sigmoid = use_sigmoid
117-
self.route_sclaing_factor
117+
self.route_sclaing_factor = route_sclaing_factor
118118

119119
self.weight = nn.Parameter(
120120
torch.empty((self.n_routed_experts, self.gating_dim))
121121
)
122-
# TODO: is this needed? This is not "Complementary Sequence-Wise Auxiliary Loss"
123-
# self.e_score_correction_bias = nn.Parameter(torch.rand((self.num_experts)))
124122

125123
def forward(
126124
self, x: torch.Tensor, expert_bias: torch.Tensor = None

0 commit comments

Comments
 (0)