@@ -465,48 +465,50 @@ DR_BENCHMARK(Stencil2D_Reference);
465
465
// Distributed vector of floats. Granularity ensures segments contain
466
466
// whole rows. Explicitly process segments SPMD-style with SYCL
467
467
//
468
- static void Stencil2D_SegmentedSYCL_DR (benchmark::State &state) {
469
- auto s = default_shape ();
470
- auto rows = s[0 ];
471
- auto cols = s[1 ];
472
-
473
- if (rows == 0 ) {
474
- return ;
475
- }
476
-
477
- auto dist = dr::mhp::distribution ().halo (cols).granularity (cols);
478
- dr::mhp::distributed_vector<T> a (rows * cols, init_val, dist);
479
- dr::mhp::distributed_vector<T> b (rows * cols, init_val, dist);
480
- Stats stats (state, sizeof (T) * a.size (), sizeof (T) * b.size ());
481
-
482
- // fails on devcloud
483
- // Checker checker;
484
- auto in =
485
- dr::mhp::local_segment (rng::subrange (a.begin () + cols, a.end () - cols));
486
- auto out =
487
- dr::mhp::local_segment (rng::subrange (b.begin () + cols, b.end () - cols));
488
- auto size = rng::size (in);
489
- assert (size % cols == 0 );
490
- auto row_slice = size / cols;
491
-
492
- auto q = dr::mhp::sycl_queue ();
493
- sycl::range global (row_slice, cols - 2 );
494
-
495
- for (auto _ : state) {
496
- for (std::size_t s = 0 ; s < stencil_steps; s++) {
497
- stats.rep ();
498
- auto op = [=](auto it) {
499
- stencil_1darray_op (in, out, cols, it[0 ], it[1 ] + 1 );
500
- };
501
- dr::mhp::halo (stencil_steps % 2 ? b : a).exchange ();
502
- q.parallel_for (sycl::range (row_slice, cols - 2 ), op).wait ();
503
- std::swap (in, out);
504
- }
505
- // fails on devcloud
506
- // checker.check(stencil_steps % 2 ? b : a);
507
- }
508
- }
509
-
510
- DR_BENCHMARK (Stencil2D_SegmentedSYCL_DR);
468
+ // static void Stencil2D_SegmentedSYCL_DR(benchmark::State &state) {
469
+ // auto s = default_shape();
470
+ // auto rows = s[0];
471
+ // auto cols = s[1];
472
+
473
+ // if (rows == 0) {
474
+ // return;
475
+ // }
476
+
477
+ // auto dist = dr::mhp::distribution().halo(cols).granularity(cols);
478
+ // dr::mhp::distributed_vector<T> a(rows * cols, init_val, dist);
479
+ // dr::mhp::distributed_vector<T> b(rows * cols, init_val, dist);
480
+ // Stats stats(state, sizeof(T) * a.size(), sizeof(T) * b.size());
481
+
482
+ // // fails on devcloud
483
+ // // Checker checker;
484
+ // auto in =
485
+ // dr::mhp::local_segment(rng::subrange(a.begin() + cols, a.end() -
486
+ // cols));
487
+ // auto out =
488
+ // dr::mhp::local_segment(rng::subrange(b.begin() + cols, b.end() -
489
+ // cols));
490
+ // auto size = rng::size(in);
491
+ // assert(size % cols == 0);
492
+ // auto row_slice = size / cols;
493
+
494
+ // auto q = dr::mhp::sycl_queue();
495
+ // sycl::range global(row_slice, cols - 2);
496
+
497
+ // for (auto _ : state) {
498
+ // for (std::size_t s = 0; s < stencil_steps; s++) {
499
+ // stats.rep();
500
+ // auto op = [=](auto it) {
501
+ // stencil_1darray_op(in, out, cols, it[0], it[1] + 1);
502
+ // };
503
+ // dr::mhp::halo(stencil_steps % 2 ? b : a).exchange();
504
+ // q.parallel_for(sycl::range(row_slice, cols - 2), op).wait();
505
+ // std::swap(in, out);
506
+ // }
507
+ // // fails on devcloud
508
+ // // checker.check(stencil_steps % 2 ? b : a);
509
+ // }
510
+ // }
511
+
512
+ // DR_BENCHMARK(Stencil2D_SegmentedSYCL_DR);
511
513
512
514
#endif // SYCL_LANGUAGE_VERSION
0 commit comments