Skip to content

Commit da648e8

Browse files
committed
Enable async copy for B.
Change-Id: Ia3ac37ad162a8cf3ccce4f268e81bd06c8ac3c46
1 parent feac956 commit da648e8

File tree

1 file changed

+3
-3
lines changed

1 file changed

+3
-3
lines changed

custom_ops/gpu_ops/cutlass_extensions/gemm/threadblock/wint2x_mma_multistage.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -475,7 +475,7 @@ class Wint2xMmaMultistage :
475475
copy_tiles_and_advance_per_stage_A(iterator_A);
476476

477477
// Async copy zipped B to shared memory.
478-
copy_tiles_and_advance_per_stage_B<false, true>(iterator_B);
478+
copy_tiles_and_advance_per_stage_B<true, true>(iterator_B);
479479

480480
// TODO: Async copy other quantized params to shared memory, local_scale, code_scale, code_zp, super_scale.
481481
//tile_dequanter_B.Load(smem_zipped_ptr_B_ + (stage % Base::kStages) * smem_zipped_bytes_per_stage_B_,
@@ -609,7 +609,7 @@ class Wint2xMmaMultistage :
609609
int group_start_iteration_B = warp_mma_k * Detail::kAccessesPerGroupB;
610610

611611
copy_tiles_and_advance_A(iterator_A, group_start_iteration_A);
612-
copy_tiles_and_advance_B<false>(iterator_B, group_start_iteration_B);
612+
copy_tiles_and_advance_B<true>(iterator_B, group_start_iteration_B);
613613
}
614614

615615
// The second-to-last warp-tile also:
@@ -621,7 +621,7 @@ class Wint2xMmaMultistage :
621621
int group_start_iteration_B = (warp_mma_k + 1) * Detail::kAccessesPerGroupB;
622622

623623
copy_tiles_and_advance_A(iterator_A, group_start_iteration_A);
624-
copy_tiles_and_advance_B<false>(iterator_B, group_start_iteration_B);
624+
copy_tiles_and_advance_B<true>(iterator_B, group_start_iteration_B);
625625

626626
// Inserts a memory fence between stages of cp.async instructions.
627627
cutlass::arch::cp_async_fence();

0 commit comments

Comments
 (0)