5
5
#include < memory>
6
6
#include < oneapi/mkl.hpp>
7
7
8
- // This is a workaround to flush MKL submissions into Level-zero queue, using
9
- // unspecified but guaranteed behavior of intel-sycl runtime. Once SYCL standard
10
- // committee approves sycl::queue::flush() we will change the macro to use that
11
- #define __FORCE_MKL_FLUSH__ (cmd ) \
12
- sycl::get_native<sycl::backend::ext_oneapi_level_zero>(cmd)
13
-
14
8
oneapi::mkl::transpose convert (onemklTranspose val) {
15
9
switch (val) {
16
10
case ONEMKL_TRANSPOSE_NONTRANS:
@@ -392,7 +386,6 @@ extern "C" int onemklHgemm_batch(syclQueue_t device_queue, onemklTranspose trans
392
386
reinterpret_cast <const sycl::half **>(&b[0 ]), ldb,
393
387
reinterpret_cast <sycl::half *>(beta), reinterpret_cast <sycl::half **>(&c[0 ]),
394
388
ldc, group_count, group_size, {});
395
- __FORCE_MKL_FLUSH__ (status);
396
389
return 0 ;
397
390
}
398
391
@@ -410,7 +403,6 @@ extern "C" int onemklSgemm_batch(syclQueue_t device_queue, onemklTranspose trans
410
403
(const float **)&b[0 ], ldb,
411
404
beta, &c[0 ], ldc,
412
405
group_count, group_size, {});
413
- __FORCE_MKL_FLUSH__ (status);
414
406
return 0 ;
415
407
}
416
408
@@ -428,7 +420,6 @@ extern "C" int onemklDgemm_batch(syclQueue_t device_queue, onemklTranspose trans
428
420
(const double **)&b[0 ], ldb,
429
421
beta, &c[0 ], ldc,
430
422
group_count, group_size, {});
431
- __FORCE_MKL_FLUSH__ (status);
432
423
return 0 ;
433
424
}
434
425
@@ -450,7 +441,6 @@ extern "C" int onemklCgemm_batch(syclQueue_t device_queue, onemklTranspose trans
450
441
reinterpret_cast <std::complex<float > *>(beta),
451
442
reinterpret_cast <std::complex<float > **>(&c[0 ]), ldc,
452
443
group_count, group_size, {});
453
- __FORCE_MKL_FLUSH__ (status);
454
444
return 0 ;
455
445
}
456
446
@@ -473,7 +463,6 @@ extern "C" int onemklZgemm_batch(syclQueue_t device_queue, onemklTranspose trans
473
463
reinterpret_cast <std::complex<double > *>(beta),
474
464
reinterpret_cast <std::complex<double > **>(&c[0 ]), ldc,
475
465
group_count, group_size, {});
476
- __FORCE_MKL_FLUSH__ (status);
477
466
return 0 ;
478
467
}
479
468
@@ -490,7 +479,6 @@ extern "C" int onemklStrsm_batch(syclQueue_t device_queue, onemklSide left_right
490
479
&trsmInfo.m_transa [0 ], &trsmInfo.m_unitdiag [0 ],
491
480
m, n, alpha, (const float **)&a[0 ], lda,
492
481
&b[0 ], ldb, group_count, group_size, {});
493
- __FORCE_MKL_FLUSH__ (status);
494
482
return 0 ;
495
483
}
496
484
@@ -508,7 +496,6 @@ extern "C" int onemklDtrsm_batch(syclQueue_t device_queue, onemklSide left_right
508
496
&trsmInfo.m_transa [0 ], &trsmInfo.m_unitdiag [0 ],
509
497
m, n, alpha, (const double **)&a[0 ], lda, &b[0 ],
510
498
ldb, group_count, group_size, {});
511
- __FORCE_MKL_FLUSH__ (status);
512
499
return 0 ;
513
500
}
514
501
@@ -528,7 +515,6 @@ extern "C" int onemklCtrsm_batch(syclQueue_t device_queue, onemklSide left_right
528
515
reinterpret_cast <const std::complex<float > **>(&a[0 ]),
529
516
lda, reinterpret_cast <std::complex<float > **>(&b[0 ]),
530
517
ldb, group_count, group_size, {});
531
- __FORCE_MKL_FLUSH__ (status);
532
518
return 0 ;
533
519
}
534
520
@@ -548,6 +534,5 @@ extern "C" int onemklZtrsm_batch(syclQueue_t device_queue, onemklSide left_right
548
534
reinterpret_cast <const std::complex<double > **>(&a[0 ]),
549
535
lda, reinterpret_cast <std::complex<double > **>(&b[0 ]),
550
536
ldb, group_count, group_size, {});
551
- __FORCE_MKL_FLUSH__ (status);
552
537
return 0 ;
553
538
}
0 commit comments