Skip to content

Commit 3fbf101

Browse files
authored
[SYCL][Joint Matrix] Implement bfloat16 32x64x32 combination support (#13590)
1 parent 964f963 commit 3fbf101

6 files changed

+24
-3
lines changed

sycl/test-e2e/Matrix/Inputs/element_wise_all_ops_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,11 @@ int main() {
229229
test_ewops_c<float, 16, 16>();
230230
// This combination is not currently supported for sub group size = 32 in IGC
231231
#if (!defined(SG_SZ) || SG_SZ != 32)
232+
test_ewops_ab<bfloat16, 1, 32, use::a, layout::row_major, 1>();
232233
test_ewops_ab<bfloat16, 32, 16, use::a, layout::row_major, 1>();
234+
test_ewops_ab<bfloat16, 32, 32, use::a, layout::row_major, 1>();
233235
test_ewops_ab<bfloat16, 16, 64, use::b, layout::ext_intel_packed, 2>();
236+
test_ewops_ab<bfloat16, 32, 64, use::b, layout::ext_intel_packed, 2>();
234237
test_ewops_c<float, 1, 64>();
235238
test_ewops_c<float, 32, 64>();
236239
#endif

sycl/test-e2e/Matrix/Inputs/element_wise_ops_impl.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,9 @@ int main() {
133133
// IGC
134134
passed &= test<bfloat16, float, 16, 16, 16, 2, class pvc_bf16_16x16x16>();
135135
passed &= test<bfloat16, float, 1, 64, 16, 2, class pvc_bf16_1x64x16>();
136+
passed &= test<bfloat16, float, 1, 64, 32, 2, class pvc_bf16_1x64x32>();
136137
passed &= test<bfloat16, float, 32, 64, 16, 2, class pvc_bf16_32x64x16>();
138+
passed &= test<bfloat16, float, 32, 64, 32, 2, class pvc_bf16_32x64x32>();
137139
#endif
138140
break;
139141
}

sycl/test-e2e/Matrix/Inputs/joint_matrix_bf16_fill_k_cache_impl.hpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,17 @@ int main(
483483
MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size);
484484
test<bfloat16, float, VnniFactor, /*TM*/ 32, /*TN*/ 64, /*TK*/ 16,
485485
MCache1, NCache1, KCache1, MCache2, NCache2, KCache2>(matrix_size);
486-
#endif
486+
// `#ifndef PREFETCH` is a workaround for GSD-10535.
487+
#ifndef PREFETCH
488+
// The test is commented out due flaky results: GSD-10537.
489+
// test<bfloat16, float, VnniFactor, /*TM*/ 1, /*TN*/ 64, /*TK*/ 32,
490+
// MCache1,
491+
// NCache1, /*KCache1*/ 32, MCache2, NCache2, KCache2>(matrix_size);
492+
#endif // PREFETCH
493+
test<bfloat16, float, VnniFactor, /*TM*/ 32, /*TN*/ 64, /*TK*/ 32,
494+
MCache1, NCache1, /*KCache1*/ 32, MCache2, NCache2, KCache2>(
495+
matrix_size);
496+
#endif // (!defined(SG_SZ) || SG_SZ != 32)
487497
break;
488498
}
489499

sycl/test-e2e/Matrix/Inputs/joint_matrix_rowmajorA_rowmajorB_impl.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,12 @@ int main() {
127127
bfloat16, float>();
128128
res += gemm_row_major<1, 64, 16, class bf16_1x64x16, bfloat16, bfloat16,
129129
float>();
130+
res += gemm_row_major<1, 64, 32, class bf16_1x64x32, bfloat16, bfloat16,
131+
float>();
130132
res += gemm_row_major<32, 64, 16, class bf16_32x64x16, bfloat16,
131133
bfloat16, float>();
134+
res += gemm_row_major<32, 64, 32, class bf16_32x64x32, bfloat16,
135+
bfloat16, float>();
132136
}
133137
break;
134138
}

sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111

1212
// REQUIRES: aspect-ext_intel_matrix
1313

14-
// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
14+
// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t.out -DMANUAL_UNROLL -DVNNI
1515
// RUN: %{run} %t.out
1616

1717
// -mllvm -inline-threshold=2000 added as a workaround,
1818
// since IGC doesn't support some variants of IR for Joint Matrix currently
19+
// -inline-threshold increased to 5000 to workaround bug in IGC: GSD-10534
1920
// -ffp-model=precise is added to not depend on compiler defaults.
2021

2122
#include "common.hpp"

sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_unroll_init.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@
1111

1212
// REQUIRES: aspect-ext_intel_matrix, gpu
1313

14-
// RUN: %{build} -mllvm -inline-threshold=2000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
14+
// RUN: %{build} -mllvm -inline-threshold=5000 %fp-model-precise -o %t_gpu.out -DINIT_LIST -DMANUAL_UNROLL -DVNNI
1515
// RUN: %{run} %t_gpu.out
1616

1717
// -mllvm -inline-threshold=2000 added as a workaround,
1818
// since IGC doesn't support some variants of IR for Joint Matrix currently
19+
// -inline-threshold increased to 5000 to workaround bug in IGC: GSD-10534
1920
// -ffp-model=precise is added to not depend on compiler defaults.
2021

2122
#include "common.hpp"

0 commit comments

Comments
 (0)