@@ -740,6 +740,16 @@ class Wint2xMmaMultistage :
740
740
warp_k_compute_offset_B
741
741
);
742
742
#if 0
743
+ CUTLASS_TRACE_DEVICE(" pipe_state.warp_frag_B_=[%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f]",
744
+ static_cast<float>(pipe_state.warp_frag_B_[0]), static_cast<float>(pipe_state.warp_frag_B_[1]),
745
+ static_cast<float>(pipe_state.warp_frag_B_[2]), static_cast<float>(pipe_state.warp_frag_B_[3]),
746
+ static_cast<float>(pipe_state.warp_frag_B_[4]), static_cast<float>(pipe_state.warp_frag_B_[5]),
747
+ static_cast<float>(pipe_state.warp_frag_B_[6]), static_cast<float>(pipe_state.warp_frag_B_[7]),
748
+ static_cast<float>(pipe_state.warp_frag_B_[8]), static_cast<float>(pipe_state.warp_frag_B_[9]),
749
+ static_cast<float>(pipe_state.warp_frag_B_[10]), static_cast<float>(pipe_state.warp_frag_B_[11]),
750
+ static_cast<float>(pipe_state.warp_frag_B_[12]), static_cast<float>(pipe_state.warp_frag_B_[13]),
751
+ static_cast<float>(pipe_state.warp_frag_B_[14]), static_cast<float>(pipe_state.warp_frag_B_[15]));
752
+
743
753
if (FragmentC::kElements == 16) {
744
754
CUTLASS_TRACE_DEVICE(" tile_C[0:15]=[%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f]",
745
755
static_cast<float>(accum[0]), static_cast<float>(accum[1]),
@@ -751,7 +761,6 @@ class Wint2xMmaMultistage :
751
761
static_cast<float>(accum[12]), static_cast<float>(accum[13]),
752
762
static_cast<float>(accum[14]), static_cast<float>(accum[15]));
753
763
}
754
- #endif
755
764
756
765
// CUTLASS_TRACE_DEVICE_TID(" now1 warp_loaded_frag_A_[0:7]=[%f, %f, %f, %f, %f, %f, %f, %f]",
757
766
// static_cast<float>(pipe_state.warp_loaded_frag_A_[warp_mma_k % 2][0]), static_cast<float>(pipe_state.warp_loaded_frag_A_[warp_mma_k % 2][1]),
@@ -779,6 +788,7 @@ class Wint2xMmaMultistage :
779
788
// static_cast<float>(accum[10]), static_cast<float>(accum[11]),
780
789
// static_cast<float>(accum[12]), static_cast<float>(accum[13]),
781
790
// static_cast<float>(accum[14]), static_cast<float>(accum[15]));
791
+ #endif
782
792
}
783
793
784
794
// Except for the last warp-tile, all warp-tiles issue their share of
0 commit comments