@@ -20,7 +20,6 @@ __global__ void expandInputRowsKernel(
20
20
int expert_id = sorted_experts[expanded_dest_row];
21
21
22
22
extern __shared__ int64_t smem_expert_first_token_offset[];
23
- int64_t align_expanded_row_accumulate = 0 ;
24
23
if constexpr (ALIGN_BLOCK_SIZE) {
25
24
// load g2s
26
25
for (int idx = threadIdx.x ; idx < num_local_experts + 1 ;
@@ -63,7 +62,6 @@ __global__ void expandInputRowsKernel(
63
62
using DataElem = cutlass::Array<T, ELEM_PER_THREAD>;
64
63
65
64
// Duplicate and permute rows
66
- int64_t const source_k_rank = expanded_source_row / num_rows;
67
65
int64_t const source_row = expanded_source_row % num_rows;
68
66
69
67
auto const * source_row_ptr =
@@ -160,7 +158,6 @@ __global__ void finalizeMoeRoutingKernel(
160
158
elem_index += stride) {
161
159
ComputeElem thread_output;
162
160
thread_output.fill (0 );
163
- float row_rescale{0 .f };
164
161
for (int k_idx = 0 ; k_idx < k; ++k_idx) {
165
162
int64_t const expanded_original_row = original_row + k_idx * num_rows;
166
163
int64_t const expanded_permuted_row =
@@ -177,8 +174,6 @@ __global__ void finalizeMoeRoutingKernel(
177
174
auto const * expanded_permuted_rows_row_ptr =
178
175
expanded_permuted_rows_v + expanded_permuted_row * num_elems_in_col;
179
176
180
- int64_t const expert_idx = expert_for_source_row[k_offset];
181
-
182
177
ComputeElem expert_result = arrayConvert<InputElem, ComputeElem>(
183
178
expanded_permuted_rows_row_ptr[elem_index]);
184
179
thread_output = thread_output + row_scale * (expert_result);
0 commit comments