From 1c046c6c931fbac59efa42210203a5ade57e5462 Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Fri, 2 Feb 2024 14:34:20 +0100 Subject: [PATCH 1/6] Add deal.II example (with Kokkos) --- examples/deal.II/CMakeLists.txt | 22 +- examples/deal.II/README.MD | 2 +- examples/deal.II/bps.h | 196 +---------- examples/deal.II/{bps.cc => bps_01.cc} | 197 ++++++++++- examples/deal.II/bps_02.cc | 465 +++++++++++++++++++++++++ 5 files changed, 679 insertions(+), 203 deletions(-) rename examples/deal.II/{bps.cc => bps_01.cc} (57%) create mode 100644 examples/deal.II/bps_02.cc diff --git a/examples/deal.II/CMakeLists.txt b/examples/deal.II/CMakeLists.txt index 272facfc00..d7671eba8a 100644 --- a/examples/deal.II/CMakeLists.txt +++ b/examples/deal.II/CMakeLists.txt @@ -11,13 +11,21 @@ IF(NOT ${deal.II_FOUND}) ) ENDIF() -DEAL_II_INITIALIZE_CACHED_VARIABLES() -PROJECT("bps") +FILE(GLOB SOURCE_FILES "*.cc") -DEAL_II_INITIALIZE_CACHED_VARIABLES() +FOREACH ( source_file ${SOURCE_FILES} ) + GET_FILENAME_COMPONENT(file_name ${source_file} NAME) + STRING( REPLACE ".cc" "" exec ${file_name} ) -ADD_EXECUTABLE(bps bps.cc) -DEAL_II_SETUP_TARGET(bps) + DEAL_II_INITIALIZE_CACHED_VARIABLES() + PROJECT(${exec}) -TARGET_INCLUDE_DIRECTORIES(bps PUBLIC ${CEED_DIR}/include) -TARGET_LINK_LIBRARIES(bps ${CEED_DIR}/lib/libceed.so) + DEAL_II_INITIALIZE_CACHED_VARIABLES() + + ADD_EXECUTABLE(${exec} ${source_file}) + DEAL_II_SETUP_TARGET(${exec}) + + TARGET_INCLUDE_DIRECTORIES(${exec} PUBLIC ${CEED_DIR}/include) + TARGET_LINK_LIBRARIES(${exec} ${CEED_DIR}/lib/libceed.so) + +ENDFOREACH ( source_file ${SOURCE_FILES} ) diff --git a/examples/deal.II/README.MD b/examples/deal.II/README.MD index cd3f14a3cb..86ba473db5 100644 --- a/examples/deal.II/README.MD +++ b/examples/deal.II/README.MD @@ -14,7 +14,7 @@ make To run the executable, write: ``` -./bps +./bps_01 ``` Optional command-line arguments are shown by adding the command-line argument "--help". diff --git a/examples/deal.II/bps.h b/examples/deal.II/bps.h index 677ed1a81f..13efab8bfe 100644 --- a/examples/deal.II/bps.h +++ b/examples/deal.II/bps.h @@ -18,14 +18,15 @@ // deal.II includes #include +#include +#include +#include +#include #include +#include #include -#include -#include -#include - // libCEED includes #include @@ -92,14 +93,14 @@ struct BPInfo /** * Base class of operators. */ -template +template class OperatorBase { public: /** * deal.II vector type */ - using VectorType = LinearAlgebra::distributed::Vector; + using VectorType = LinearAlgebra::distributed::Vector; /** * Initialize vector. @@ -130,11 +131,11 @@ class OperatorBase /** * Operator implementation using libCEED. */ -template -class OperatorCeed : public OperatorBase +template +class OperatorCeed : public OperatorBase { public: - using VectorType = typename OperatorBase::VectorType; + using VectorType = typename OperatorBase::VectorType; /** * Constructor. @@ -712,180 +713,3 @@ class OperatorCeed : public OperatorBase mutable VectorType src_tmp; mutable VectorType dst_tmp; }; - - - -template -class OperatorDealii : public OperatorBase -{ -public: - using VectorType = typename OperatorBase::VectorType; - - /** - * Constructor. - */ - OperatorDealii(const Mapping &mapping, - const DoFHandler &dof_handler, - const AffineConstraints &constraints, - const Quadrature &quadrature, - const BPType &bp) - : mapping(mapping) - , dof_handler(dof_handler) - , constraints(constraints) - , quadrature(quadrature) - , bp(bp) - { - reinit(); - } - - /** - * Destructor. - */ - ~OperatorDealii() = default; - - /** - * Initialized internal data structures, particularly, MatrixFree. - */ - void - reinit() override - { - // configure MatrixFree - typename MatrixFree::AdditionalData additional_data; - additional_data.tasks_parallel_scheme = - MatrixFree::AdditionalData::TasksParallelScheme::none; - - // create MatrixFree - matrix_free.reinit(mapping, dof_handler, constraints, quadrature, additional_data); - } - - /** - * Matrix-vector product. - */ - void - vmult(VectorType &dst, const VectorType &src) const override - { - if (dof_handler.get_fe().n_components() == 1) - { - matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<1>, this, dst, src, true); - } - else - { - AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError()); - - matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range, this, dst, src, true); - } - } - - /** - * Initialize vector. - */ - void - initialize_dof_vector(VectorType &vec) const override - { - matrix_free.initialize_dof_vector(vec); - } - - /** - * Compute inverse of diagonal. - */ - void - compute_inverse_diagonal(VectorType &diagonal) const override - { - this->initialize_dof_vector(diagonal); - - if (dof_handler.get_fe().n_components() == 1) - { - MatrixFreeTools::compute_diagonal(matrix_free, - diagonal, - &OperatorDealii::do_cell_integral_local<1>, - this); - } - else - { - AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError()); - - MatrixFreeTools::compute_diagonal(matrix_free, - diagonal, - &OperatorDealii::do_cell_integral_local, - this); - } - - for (auto &i : diagonal) - i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0; - } - -private: - /** - * Cell integral without vector access. - */ - template - void - do_cell_integral_local(FEEvaluation &phi) const - { - if (bp <= BPType::BP2) // mass matrix - { - phi.evaluate(EvaluationFlags::values); - for (const auto q : phi.quadrature_point_indices()) - phi.submit_value(phi.get_value(q), q); - phi.integrate(EvaluationFlags::values); - } - else // Poisson operator - { - phi.evaluate(EvaluationFlags::gradients); - for (const auto q : phi.quadrature_point_indices()) - phi.submit_gradient(phi.get_gradient(q), q); - phi.integrate(EvaluationFlags::gradients); - } - } - - /** - * Cell integral on a range of cells. - */ - template - void - do_cell_integral_range(const MatrixFree &matrix_free, - VectorType &dst, - const VectorType &src, - const std::pair &range) const - { - FEEvaluation phi(matrix_free, range); - - for (unsigned cell = range.first; cell < range.second; ++cell) - { - phi.reinit(cell); - phi.read_dof_values(src); // read source vector - do_cell_integral_local(phi); // cell integral - phi.distribute_local_to_global(dst); // write to destination vector - } - } - - /** - * Mapping object passed to the constructor. - */ - const Mapping &mapping; - - /** - * DoFHandler object passed to the constructor. - */ - const DoFHandler &dof_handler; - - /** - * Constraints object passed to the constructor. - */ - const AffineConstraints &constraints; - - /** - * Quadrature rule object passed to the constructor. - */ - const Quadrature &quadrature; - - /** - * Selected BP. - */ - const BPType bp; - - /** - * MatrixFree object. - */ - MatrixFree matrix_free; -}; diff --git a/examples/deal.II/bps.cc b/examples/deal.II/bps_01.cc similarity index 57% rename from examples/deal.II/bps.cc rename to examples/deal.II/bps_01.cc index 9d72710d65..36741cc100 100644 --- a/examples/deal.II/bps.cc +++ b/examples/deal.II/bps_01.cc @@ -15,6 +15,8 @@ // // --------------------------------------------------------------------- +#include "bps.h" + // deal.II includes #include #include @@ -27,11 +29,8 @@ #include #include -#include #include #include -#include -#include #include #include @@ -40,17 +39,20 @@ #include #include +#include +#include +#include + // boost #include #include // include operators -#include "bps.h" // Test cases -//TESTARGS(name="BP1") --resource {ceed_resource} --bp BP1 --fe_degree 2 --print_timings 0 -//TESTARGS(name="BP4") --resource {ceed_resource} --bp BP4 --fe_degree 3 --print_timings 0 +// TESTARGS(name="BP1") --resource {ceed_resource} --bp BP1 --fe_degree 2 --print_timings 0 +// TESTARGS(name="BP4") --resource {ceed_resource} --bp BP4 --fe_degree 3 --print_timings 0 /** * Relevant parameters. @@ -61,7 +63,7 @@ struct Parameters unsigned int n_global_refinements = 1; unsigned int fe_degree = 2; bool print_timings = true; - std::string libCEED_resource = "/cpu/self/avx/blocked"; + std::string libCEED_resource = "/cpu/self/avx/blocked"; bool parse(int argc, char *argv[]) @@ -136,6 +138,183 @@ struct Parameters +template +class OperatorDealii : public OperatorBase +{ +public: + using VectorType = typename OperatorBase::VectorType; + + /** + * Constructor. + */ + OperatorDealii(const Mapping &mapping, + const DoFHandler &dof_handler, + const AffineConstraints &constraints, + const Quadrature &quadrature, + const BPType &bp) + : mapping(mapping) + , dof_handler(dof_handler) + , constraints(constraints) + , quadrature(quadrature) + , bp(bp) + { + reinit(); + } + + /** + * Destructor. + */ + ~OperatorDealii() = default; + + /** + * Initialized internal data structures, particularly, MatrixFree. + */ + void + reinit() override + { + // configure MatrixFree + typename MatrixFree::AdditionalData additional_data; + additional_data.tasks_parallel_scheme = + MatrixFree::AdditionalData::TasksParallelScheme::none; + + // create MatrixFree + matrix_free.reinit(mapping, dof_handler, constraints, quadrature, additional_data); + } + + /** + * Matrix-vector product. + */ + void + vmult(VectorType &dst, const VectorType &src) const override + { + if (dof_handler.get_fe().n_components() == 1) + { + matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range<1>, this, dst, src, true); + } + else + { + AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError()); + + matrix_free.cell_loop(&OperatorDealii::do_cell_integral_range, this, dst, src, true); + } + } + + /** + * Initialize vector. + */ + void + initialize_dof_vector(VectorType &vec) const override + { + matrix_free.initialize_dof_vector(vec); + } + + /** + * Compute inverse of diagonal. + */ + void + compute_inverse_diagonal(VectorType &diagonal) const override + { + this->initialize_dof_vector(diagonal); + + if (dof_handler.get_fe().n_components() == 1) + { + MatrixFreeTools::compute_diagonal(matrix_free, + diagonal, + &OperatorDealii::do_cell_integral_local<1>, + this); + } + else + { + AssertThrow(dof_handler.get_fe().n_components() == dim, ExcInternalError()); + + MatrixFreeTools::compute_diagonal(matrix_free, + diagonal, + &OperatorDealii::do_cell_integral_local, + this); + } + + for (auto &i : diagonal) + i = (std::abs(i) > 1.0e-10) ? (1.0 / i) : 1.0; + } + +private: + /** + * Cell integral without vector access. + */ + template + void + do_cell_integral_local(FEEvaluation &phi) const + { + if (bp <= BPType::BP2) // mass matrix + { + phi.evaluate(EvaluationFlags::values); + for (const auto q : phi.quadrature_point_indices()) + phi.submit_value(phi.get_value(q), q); + phi.integrate(EvaluationFlags::values); + } + else // Poisson operator + { + phi.evaluate(EvaluationFlags::gradients); + for (const auto q : phi.quadrature_point_indices()) + phi.submit_gradient(phi.get_gradient(q), q); + phi.integrate(EvaluationFlags::gradients); + } + } + + /** + * Cell integral on a range of cells. + */ + template + void + do_cell_integral_range(const MatrixFree &matrix_free, + VectorType &dst, + const VectorType &src, + const std::pair &range) const + { + FEEvaluation phi(matrix_free, range); + + for (unsigned cell = range.first; cell < range.second; ++cell) + { + phi.reinit(cell); + phi.read_dof_values(src); // read source vector + do_cell_integral_local(phi); // cell integral + phi.distribute_local_to_global(dst); // write to destination vector + } + } + + /** + * Mapping object passed to the constructor. + */ + const Mapping &mapping; + + /** + * DoFHandler object passed to the constructor. + */ + const DoFHandler &dof_handler; + + /** + * Constraints object passed to the constructor. + */ + const AffineConstraints &constraints; + + /** + * Quadrature rule object passed to the constructor. + */ + const Quadrature &quadrature; + + /** + * Selected BP. + */ + const BPType bp; + + /** + * MatrixFree object. + */ + MatrixFree matrix_free; +}; + + + int main(int argc, char *argv[]) { @@ -176,6 +355,8 @@ main(int argc, char *argv[]) DoFHandler dof_handler(tria); dof_handler.distribute_dofs(fe); + DoFRenumbering::support_point_wise(dof_handler); + AffineConstraints constraints; if (!(bp == BPType::BP1 || bp == BPType::BP2)) @@ -185,8 +366,6 @@ main(int argc, char *argv[]) constraints.close(); } - DoFRenumbering::support_point_wise(dof_handler); - const auto test = [&](const std::string &label, const auto &op) { (void)label; diff --git a/examples/deal.II/bps_02.cc b/examples/deal.II/bps_02.cc new file mode 100644 index 0000000000..6ec263fc18 --- /dev/null +++ b/examples/deal.II/bps_02.cc @@ -0,0 +1,465 @@ +// --------------------------------------------------------------------- +// +// Copyright (C) 2023 by the deal.II authors +// +// This file is part of the deal.II library. +// +// The deal.II library is free software; you can use it, redistribute +// it, and/or modify it under the terms of the GNU Lesser General +// Public License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// The full text of the license can be found in the file LICENSE.md at +// the top level directory of deal.II. +// +// Authors: Peter Munch, Martin Kronbichler +// +// --------------------------------------------------------------------- + +#include "bps.h" + +// deal.II includes +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +// boost +#include + +#include + +// include operators + +// Test cases +// TESTARGS(name="BP1") --resource {ceed_resource} --bp BP1 --fe_degree 2 --print_timings 0 +// TESTARGS(name="BP4") --resource {ceed_resource} --bp BP5 --fe_degree 3 --print_timings 0 + +/** + * Relevant parameters. + */ +struct Parameters +{ + BPType bp = BPType::BP5; + unsigned int n_global_refinements = 1; + unsigned int fe_degree = 2; + bool print_timings = true; + std::string libCEED_resource = "/cpu/self/avx/blocked"; + + bool + parse(int argc, char *argv[]) + { + if (argc == 1 && (std::string(argv[0]) == "--help")) + { + std::cout << "Usage: ./bp [OPTION]..." << std::endl; + std::cout << std::endl; + std::cout << "--bp name of benchmark (BP1, BP5)" << std::endl; + std::cout << "--n_refinements number of refinements (0-)" << std::endl; + std::cout << "--fe_degree polynomial degree (1-)" << std::endl; + std::cout << "--print_timings name of benchmark (0, 1)" << std::endl; + std::cout << "--resource name of resource (e.g., /cpu/self/avx/blocked)" << std::endl; + + return true; + } + + AssertThrow(argc % 2 == 0, ExcInternalError()); + + while (argc > 0) + { + std::string label(argv[0]); + + if ("--bp" == label) + { + std::string bp_string(argv[1]); + + if (bp_string == "BP1") + bp = BPType::BP1; // with q = p + 1 + else if (bp_string == "BP5") + bp = BPType::BP5; + else + AssertThrow(false, ExcInternalError()); + } + else if ("--n_refinements" == label) + { + n_global_refinements = std::atoi(argv[1]); + } + else if ("--fe_degree" == label) + { + fe_degree = std::atoi(argv[1]); + } + else if ("--print_timings" == label) + { + print_timings = std::atoi(argv[1]); + } + else if ("--resource" == label) + { + libCEED_resource = std::string(argv[1]); + } + else + { + AssertThrow(false, ExcNotImplemented()); + } + + + argc -= 2; + argv += 2; + } + + return false; + } +}; + + + +template +class OperatorDealiiMassQuad +{ +public: + DEAL_II_HOST_DEVICE void + operator()(CUDAWrappers::FEEvaluation *fe_eval, + const int q_point) const + { + fe_eval->submit_value(fe_eval->get_value(q_point), q_point); + } +}; + + + +template +class OperatorDealiiLaplaceQuad +{ +public: + DEAL_II_HOST_DEVICE void + operator()(CUDAWrappers::FEEvaluation *fe_eval, + const int q_point) const + { + fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point); + } +}; + + + +template +class OperatorDealiiMassLocal +{ +public: + DEAL_II_HOST_DEVICE void + operator()(const unsigned int cell, + const typename CUDAWrappers::MatrixFree::Data *gpu_data, + CUDAWrappers::SharedData *shared_data, + const Number *src, + Number *dst) const + { + (void)cell; + + CUDAWrappers::FEEvaluation fe_eval(/*cell,*/ gpu_data, + shared_data); + fe_eval.read_dof_values(src); + fe_eval.evaluate(EvaluationFlags::values); + fe_eval.apply_for_each_quad_point(OperatorDealiiMassQuad()); + fe_eval.integrate(EvaluationFlags::values); + fe_eval.distribute_local_to_global(dst); + } + static const unsigned int n_dofs_1d = fe_degree + 1; + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); +}; + + + +template +class OperatorDealiiLaplaceLocal +{ +public: + DEAL_II_HOST_DEVICE void + operator()(const unsigned int cell, + const typename CUDAWrappers::MatrixFree::Data *gpu_data, + CUDAWrappers::SharedData *shared_data, + const Number *src, + Number *dst) const + { + (void)cell; + + CUDAWrappers::FEEvaluation fe_eval(/*cell,*/ gpu_data, + shared_data); + fe_eval.read_dof_values(src); + fe_eval.evaluate(EvaluationFlags::gradients); + fe_eval.apply_for_each_quad_point(OperatorDealiiLaplaceQuad()); + fe_eval.integrate(EvaluationFlags::gradients); + fe_eval.distribute_local_to_global(dst); + } + static const unsigned int n_dofs_1d = fe_degree + 1; + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); +}; + + + +template +class OperatorDealii : public OperatorBase +{ +public: + using VectorType = typename OperatorBase::VectorType; + + /** + * Constructor. + */ + OperatorDealii(const Mapping &mapping, + const DoFHandler &dof_handler, + const AffineConstraints &constraints, + const Quadrature &quadrature, + const BPType &bp) + : mapping(mapping) + , dof_handler(dof_handler) + , constraints(constraints) + , quadrature(quadrature) + , bp(bp) + { + reinit(); + } + + /** + * Destructor. + */ + ~OperatorDealii() = default; + + /** + * Initialized internal data structures, particularly, MatrixFree. + */ + void + reinit() override + { + // configure MatrixFree + typename CUDAWrappers::MatrixFree::AdditionalData additional_data; + + if (bp <= BPType::BP2) // mass matrix + additional_data.mapping_update_flags = update_JxW_values | update_values; + else + additional_data.mapping_update_flags = update_JxW_values | update_gradients; + + // create MatrixFree + AssertThrow(quadrature.is_tensor_product(), ExcNotImplemented()); + matrix_free.reinit( + mapping, dof_handler, constraints, quadrature.get_tensor_basis()[0], additional_data); + } + + /** + * Matrix-vector product. + */ + void + vmult(VectorType &dst, const VectorType &src) const override + { + dst = 0.0; + + const unsigned int fe_degree = dof_handler.get_fe().tensor_degree(); + + if (fe_degree == 1) + this->vmult_internal<1>(dst, src); + else if (fe_degree == 2) + this->vmult_internal<2>(dst, src); + else + AssertThrow(false, ExcInternalError()); + + matrix_free.copy_constrained_values(src, dst); + } + + /** + * Initialize vector. + */ + void + initialize_dof_vector(VectorType &vec) const override + { + matrix_free.initialize_dof_vector(vec); + } + + /** + * Compute inverse of diagonal. + */ + void + compute_inverse_diagonal(VectorType &) const override + { + AssertThrow(false, ExcNotImplemented()); + } + +private: + /** + * Templated vmult function. + */ + template + void + vmult_internal(VectorType &dst, const VectorType &src) const + { + if (bp == BPType::BP1) + { + OperatorDealiiMassLocal mass_operator; + matrix_free.cell_loop(mass_operator, src, dst); + } + else if (bp == BPType::BP5) + { + OperatorDealiiLaplaceLocal local_operator; + matrix_free.cell_loop(local_operator, src, dst); + } + else + { + AssertThrow(false, ExcNotImplemented()); + } + } + + /** + * Mapping object passed to the constructor. + */ + const Mapping &mapping; + + /** + * DoFHandler object passed to the constructor. + */ + const DoFHandler &dof_handler; + + /** + * Constraints object passed to the constructor. + */ + const AffineConstraints &constraints; + + /** + * Quadrature rule object passed to the constructor. + */ + const Quadrature &quadrature; + + /** + * Selected BP. + */ + const BPType bp; + + /** + * MatrixFree object. + */ + CUDAWrappers::MatrixFree matrix_free; +}; + + + +int +main(int argc, char *argv[]) +{ + Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1); + + Parameters params; + if (params.parse(argc - 1, argv + 1)) + return 0; + + ConditionalOStream pout(std::cout, Utilities::MPI::this_mpi_process(MPI_COMM_WORLD) == 0); + + // configuration + const BPType bp = params.bp; + + using Number = double; + using VectorType = LinearAlgebra::distributed::Vector; + const unsigned int dim = 2; + const unsigned int fe_degree = params.fe_degree; + const unsigned int n_q_points = fe_degree + 1; + const unsigned int n_refinements = params.n_global_refinements; + const unsigned int n_components = 1; + + // create mapping, quadrature, fe, mesh, ... + MappingQ1 mapping; + QGauss quadrature(n_q_points); + FESystem fe(FE_Q(fe_degree), n_components); + +#ifdef DEAL_II_WITH_P4EST + parallel::distributed::Triangulation tria(MPI_COMM_WORLD); +#else + parallel::shared::Triangulation tria(MPI_COMM_WORLD, ::Triangulation::none, true); +#endif + + GridGenerator::hyper_cube(tria); + tria.refine_global(n_refinements); + + DoFHandler dof_handler(tria); + dof_handler.distribute_dofs(fe); + + DoFRenumbering::support_point_wise(dof_handler); + + AffineConstraints constraints; + + if (!(bp == BPType::BP1 || bp == BPType::BP2)) + { + // for stiffness matrix + DoFTools::make_zero_boundary_constraints(dof_handler, constraints); + constraints.close(); + } + + const auto test = [&](const std::string &label, const auto &op) { + (void)label; + + // initialize vector + VectorType u, v; + op.initialize_dof_vector(u); + op.initialize_dof_vector(v); + u = 1.0; + + constraints.set_zero(u); + + // perform matrix-vector product + op.vmult(v, u); + + // create solver + ReductionControl reduction_control(100, 1e-20, 1e-6); + + std::chrono::time_point now; + + bool not_converged = false; + + try + { + // solve problem + SolverCG solver(reduction_control); + now = std::chrono::system_clock::now(); + solver.solve(op, v, u, PreconditionIdentity()); + } + catch (const SolverControl::NoConvergence &) + { + pout << "Error: solver failed to converge with" << std::endl; + not_converged = true; + } + + + const auto time = + std::chrono::duration_cast(std::chrono::system_clock::now() - now) + .count() / + 1e9; + + + if (params.print_timings || not_converged) + { + pout << label << ": " << reduction_control.last_step() << " " << v.l2_norm() << " " + << (params.print_timings ? time : 0.0) << std::endl; + } + }; + + // create and test the libCEED operator + OperatorCeed op_ceed( + mapping, dof_handler, constraints, quadrature, bp, params.libCEED_resource); + test("ceed", op_ceed); + + // create and test a native deal.II operator + OperatorDealii op_dealii(mapping, dof_handler, constraints, quadrature, bp); + test("dealii", op_dealii); +} From a399ec2d83c06b89f86d20f0af2d145d0b52f635 Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Tue, 16 Sep 2025 20:38:43 +0200 Subject: [PATCH 2/6] Update code --- examples/deal.II/CMakeLists.txt | 2 +- examples/deal.II/bps_02.cc | 46 +++++++++++++-------------------- 2 files changed, 19 insertions(+), 29 deletions(-) diff --git a/examples/deal.II/CMakeLists.txt b/examples/deal.II/CMakeLists.txt index d7671eba8a..86b30d6fd7 100644 --- a/examples/deal.II/CMakeLists.txt +++ b/examples/deal.II/CMakeLists.txt @@ -1,4 +1,4 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 2.8.8) +CMAKE_MINIMUM_REQUIRED(VERSION 3.5) FIND_PACKAGE(deal.II 8.0 QUIET HINTS ${deal.II_DIR} ${DEAL_II_DIR} ../ ../../ $ENV{DEAL_II_DIR} diff --git a/examples/deal.II/bps_02.cc b/examples/deal.II/bps_02.cc index 6ec263fc18..cbb44105b2 100644 --- a/examples/deal.II/bps_02.cc +++ b/examples/deal.II/bps_02.cc @@ -39,8 +39,8 @@ #include #include -#include -#include +#include +#include // boost #include @@ -134,8 +134,8 @@ class OperatorDealiiMassQuad { public: DEAL_II_HOST_DEVICE void - operator()(CUDAWrappers::FEEvaluation *fe_eval, - const int q_point) const + operator()(Portable::FEEvaluation *fe_eval, + const int q_point) const { fe_eval->submit_value(fe_eval->get_value(q_point), q_point); } @@ -148,8 +148,8 @@ class OperatorDealiiLaplaceQuad { public: DEAL_II_HOST_DEVICE void - operator()(CUDAWrappers::FEEvaluation *fe_eval, - const int q_point) const + operator()(Portable::FEEvaluation *fe_eval, + const int q_point) const { fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point); } @@ -162,23 +162,18 @@ class OperatorDealiiMassLocal { public: DEAL_II_HOST_DEVICE void - operator()(const unsigned int cell, - const typename CUDAWrappers::MatrixFree::Data *gpu_data, - CUDAWrappers::SharedData *shared_data, - const Number *src, - Number *dst) const + operator()(const typename Portable::MatrixFree::Data *data, + const Portable::DeviceVector &src, + Portable::DeviceVector &dst) const { - (void)cell; - - CUDAWrappers::FEEvaluation fe_eval(/*cell,*/ gpu_data, - shared_data); + Portable::FEEvaluation fe_eval(data); fe_eval.read_dof_values(src); fe_eval.evaluate(EvaluationFlags::values); fe_eval.apply_for_each_quad_point(OperatorDealiiMassQuad()); fe_eval.integrate(EvaluationFlags::values); fe_eval.distribute_local_to_global(dst); } - static const unsigned int n_dofs_1d = fe_degree + 1; + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); }; @@ -190,23 +185,18 @@ class OperatorDealiiLaplaceLocal { public: DEAL_II_HOST_DEVICE void - operator()(const unsigned int cell, - const typename CUDAWrappers::MatrixFree::Data *gpu_data, - CUDAWrappers::SharedData *shared_data, - const Number *src, - Number *dst) const + operator()(const typename Portable::MatrixFree::Data *data, + const Portable::DeviceVector &src, + Portable::DeviceVector &dst) const { - (void)cell; - - CUDAWrappers::FEEvaluation fe_eval(/*cell,*/ gpu_data, - shared_data); + Portable::FEEvaluation fe_eval(data); fe_eval.read_dof_values(src); fe_eval.evaluate(EvaluationFlags::gradients); fe_eval.apply_for_each_quad_point(OperatorDealiiLaplaceQuad()); fe_eval.integrate(EvaluationFlags::gradients); fe_eval.distribute_local_to_global(dst); } - static const unsigned int n_dofs_1d = fe_degree + 1; + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); }; @@ -248,7 +238,7 @@ class OperatorDealii : public OperatorBase reinit() override { // configure MatrixFree - typename CUDAWrappers::MatrixFree::AdditionalData additional_data; + typename Portable::MatrixFree::AdditionalData additional_data; if (bp <= BPType::BP2) // mass matrix additional_data.mapping_update_flags = update_JxW_values | update_values; @@ -351,7 +341,7 @@ class OperatorDealii : public OperatorBase /** * MatrixFree object. */ - CUDAWrappers::MatrixFree matrix_free; + Portable::MatrixFree matrix_free; }; From 2ea90d0c06c21bd092db2c674886bb7aa30a768b Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Tue, 16 Sep 2025 20:59:59 +0200 Subject: [PATCH 3/6] Work on number of components --- examples/deal.II/bps_02.cc | 72 ++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/examples/deal.II/bps_02.cc b/examples/deal.II/bps_02.cc index cbb44105b2..d680c8db94 100644 --- a/examples/deal.II/bps_02.cc +++ b/examples/deal.II/bps_02.cc @@ -92,8 +92,16 @@ struct Parameters if (bp_string == "BP1") bp = BPType::BP1; // with q = p + 1 + else if (bp_string == "BP2") + bp = BPType::BP2; // with q = p + 1 + else if (bp_string == "BP3") + bp = BPType::BP3; // with q = p + 1 + else if (bp_string == "BP4") + bp = BPType::BP4; // with q = p + 1 else if (bp_string == "BP5") bp = BPType::BP5; + else if (bp_string == "BP6") + bp = BPType::BP6; else AssertThrow(false, ExcInternalError()); } @@ -129,13 +137,13 @@ struct Parameters -template +template class OperatorDealiiMassQuad { public: DEAL_II_HOST_DEVICE void - operator()(Portable::FEEvaluation *fe_eval, - const int q_point) const + operator()(Portable::FEEvaluation *fe_eval, + const int q_point) const { fe_eval->submit_value(fe_eval->get_value(q_point), q_point); } @@ -143,13 +151,13 @@ class OperatorDealiiMassQuad -template +template class OperatorDealiiLaplaceQuad { public: DEAL_II_HOST_DEVICE void - operator()(Portable::FEEvaluation *fe_eval, - const int q_point) const + operator()(Portable::FEEvaluation *fe_eval, + const int q_point) const { fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point); } @@ -157,7 +165,7 @@ class OperatorDealiiLaplaceQuad -template +template class OperatorDealiiMassLocal { public: @@ -166,21 +174,22 @@ class OperatorDealiiMassLocal const Portable::DeviceVector &src, Portable::DeviceVector &dst) const { - Portable::FEEvaluation fe_eval(data); + Portable::FEEvaluation fe_eval(data); fe_eval.read_dof_values(src); fe_eval.evaluate(EvaluationFlags::values); - fe_eval.apply_for_each_quad_point(OperatorDealiiMassQuad()); + fe_eval.apply_for_each_quad_point( + OperatorDealiiMassQuad()); fe_eval.integrate(EvaluationFlags::values); fe_eval.distribute_local_to_global(dst); } - static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim) * n_components; static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); }; -template +template class OperatorDealiiLaplaceLocal { public: @@ -189,15 +198,16 @@ class OperatorDealiiLaplaceLocal const Portable::DeviceVector &src, Portable::DeviceVector &dst) const { - Portable::FEEvaluation fe_eval(data); + Portable::FEEvaluation fe_eval(data); fe_eval.read_dof_values(src); fe_eval.evaluate(EvaluationFlags::gradients); - fe_eval.apply_for_each_quad_point(OperatorDealiiLaplaceQuad()); + fe_eval.apply_for_each_quad_point( + OperatorDealiiLaplaceQuad()); fe_eval.integrate(EvaluationFlags::gradients); fe_eval.distribute_local_to_global(dst); } - static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim) * n_components; static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); }; @@ -259,12 +269,17 @@ class OperatorDealii : public OperatorBase { dst = 0.0; - const unsigned int fe_degree = dof_handler.get_fe().tensor_degree(); - - if (fe_degree == 1) - this->vmult_internal<1>(dst, src); - else if (fe_degree == 2) - this->vmult_internal<2>(dst, src); + const unsigned int n_components = dof_handler.get_fe().n_components(); + const unsigned int fe_degree = dof_handler.get_fe().tensor_degree(); + + if (n_components == 1 && fe_degree == 1) + this->vmult_internal<1, 1>(dst, src); + else if (n_components == 1 && fe_degree == 2) + this->vmult_internal<1, 2>(dst, src); + else if (n_components == dim && fe_degree == 1) + this->vmult_internal(dst, src); + else if (n_components == dim && fe_degree == 2) + this->vmult_internal(dst, src); else AssertThrow(false, ExcInternalError()); @@ -293,23 +308,19 @@ class OperatorDealii : public OperatorBase /** * Templated vmult function. */ - template + template void vmult_internal(VectorType &dst, const VectorType &src) const { - if (bp == BPType::BP1) + if (bp <= BPType::BP2) // mass matrix { - OperatorDealiiMassLocal mass_operator; + OperatorDealiiMassLocal mass_operator; matrix_free.cell_loop(mass_operator, src, dst); } - else if (bp == BPType::BP5) - { - OperatorDealiiLaplaceLocal local_operator; - matrix_free.cell_loop(local_operator, src, dst); - } else { - AssertThrow(false, ExcNotImplemented()); + OperatorDealiiLaplaceLocal local_operator; + matrix_free.cell_loop(local_operator, src, dst); } } @@ -366,7 +377,8 @@ main(int argc, char *argv[]) const unsigned int fe_degree = params.fe_degree; const unsigned int n_q_points = fe_degree + 1; const unsigned int n_refinements = params.n_global_refinements; - const unsigned int n_components = 1; + const unsigned int n_components = + (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5) ? 1 : dim; // create mapping, quadrature, fe, mesh, ... MappingQ1 mapping; From c331f7ad625ffe0f072b25691bef9de6f936f238 Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Tue, 16 Sep 2025 21:05:20 +0200 Subject: [PATCH 4/6] Work on number of quadrature points --- examples/deal.II/bps_02.cc | 54 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/examples/deal.II/bps_02.cc b/examples/deal.II/bps_02.cc index d680c8db94..9fd89aad53 100644 --- a/examples/deal.II/bps_02.cc +++ b/examples/deal.II/bps_02.cc @@ -137,12 +137,12 @@ struct Parameters -template +template class OperatorDealiiMassQuad { public: DEAL_II_HOST_DEVICE void - operator()(Portable::FEEvaluation *fe_eval, + operator()(Portable::FEEvaluation *fe_eval, const int q_point) const { fe_eval->submit_value(fe_eval->get_value(q_point), q_point); @@ -151,12 +151,12 @@ class OperatorDealiiMassQuad -template +template class OperatorDealiiLaplaceQuad { public: DEAL_II_HOST_DEVICE void - operator()(Portable::FEEvaluation *fe_eval, + operator()(Portable::FEEvaluation *fe_eval, const int q_point) const { fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point); @@ -165,7 +165,7 @@ class OperatorDealiiLaplaceQuad -template +template class OperatorDealiiMassLocal { public: @@ -174,22 +174,22 @@ class OperatorDealiiMassLocal const Portable::DeviceVector &src, Portable::DeviceVector &dst) const { - Portable::FEEvaluation fe_eval(data); + Portable::FEEvaluation fe_eval(data); fe_eval.read_dof_values(src); fe_eval.evaluate(EvaluationFlags::values); fe_eval.apply_for_each_quad_point( - OperatorDealiiMassQuad()); + OperatorDealiiMassQuad()); fe_eval.integrate(EvaluationFlags::values); fe_eval.distribute_local_to_global(dst); } static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim) * n_components; - static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_q_points = Utilities::pow(n_q_points_1d, dim); }; -template +template class OperatorDealiiLaplaceLocal { public: @@ -198,17 +198,17 @@ class OperatorDealiiLaplaceLocal const Portable::DeviceVector &src, Portable::DeviceVector &dst) const { - Portable::FEEvaluation fe_eval(data); + Portable::FEEvaluation fe_eval(data); fe_eval.read_dof_values(src); fe_eval.evaluate(EvaluationFlags::gradients); fe_eval.apply_for_each_quad_point( - OperatorDealiiLaplaceQuad()); + OperatorDealiiLaplaceQuad()); fe_eval.integrate(EvaluationFlags::gradients); fe_eval.distribute_local_to_global(dst); } static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim) * n_components; - static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_q_points = Utilities::pow(n_q_points_1d, dim); }; @@ -269,17 +269,18 @@ class OperatorDealii : public OperatorBase { dst = 0.0; - const unsigned int n_components = dof_handler.get_fe().n_components(); - const unsigned int fe_degree = dof_handler.get_fe().tensor_degree(); - - if (n_components == 1 && fe_degree == 1) - this->vmult_internal<1, 1>(dst, src); - else if (n_components == 1 && fe_degree == 2) - this->vmult_internal<1, 2>(dst, src); - else if (n_components == dim && fe_degree == 1) - this->vmult_internal(dst, src); - else if (n_components == dim && fe_degree == 2) - this->vmult_internal(dst, src); + const unsigned int n_components = dof_handler.get_fe().n_components(); + const unsigned int fe_degree = dof_handler.get_fe().tensor_degree(); + const unsigned int n_q_points_1d = quadrature.get_tensor_basis()[0].size(); + + if (n_components == 1 && fe_degree == 1 && n_q_points_1d == 2) + this->vmult_internal<1, 1, 2>(dst, src); + else if (n_components == 1 && fe_degree == 2 && n_q_points_1d == 3) + this->vmult_internal<1, 2, 3>(dst, src); + else if (n_components == dim && fe_degree == 1 && n_q_points_1d == 2) + this->vmult_internal(dst, src); + else if (n_components == dim && fe_degree == 2 && n_q_points_1d == 3) + this->vmult_internal(dst, src); else AssertThrow(false, ExcInternalError()); @@ -308,18 +309,19 @@ class OperatorDealii : public OperatorBase /** * Templated vmult function. */ - template + template void vmult_internal(VectorType &dst, const VectorType &src) const { if (bp <= BPType::BP2) // mass matrix { - OperatorDealiiMassLocal mass_operator; + OperatorDealiiMassLocal mass_operator; matrix_free.cell_loop(mass_operator, src, dst); } else { - OperatorDealiiLaplaceLocal local_operator; + OperatorDealiiLaplaceLocal + local_operator; matrix_free.cell_loop(local_operator, src, dst); } } From 788b6060cfde9d0b18cfbcd8edd9da9f668666bc Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Tue, 16 Sep 2025 21:09:44 +0200 Subject: [PATCH 5/6] Continue --- examples/deal.II/bps_02.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/deal.II/bps_02.cc b/examples/deal.II/bps_02.cc index 9fd89aad53..5927bba726 100644 --- a/examples/deal.II/bps_02.cc +++ b/examples/deal.II/bps_02.cc @@ -71,7 +71,7 @@ struct Parameters { std::cout << "Usage: ./bp [OPTION]..." << std::endl; std::cout << std::endl; - std::cout << "--bp name of benchmark (BP1, BP5)" << std::endl; + std::cout << "--bp name of benchmark (BP1-BP6)" << std::endl; std::cout << "--n_refinements number of refinements (0-)" << std::endl; std::cout << "--fe_degree polynomial degree (1-)" << std::endl; std::cout << "--print_timings name of benchmark (0, 1)" << std::endl; @@ -91,13 +91,13 @@ struct Parameters std::string bp_string(argv[1]); if (bp_string == "BP1") - bp = BPType::BP1; // with q = p + 1 + bp = BPType::BP1; else if (bp_string == "BP2") - bp = BPType::BP2; // with q = p + 1 + bp = BPType::BP2; else if (bp_string == "BP3") - bp = BPType::BP3; // with q = p + 1 + bp = BPType::BP3; else if (bp_string == "BP4") - bp = BPType::BP4; // with q = p + 1 + bp = BPType::BP4; else if (bp_string == "BP5") bp = BPType::BP5; else if (bp_string == "BP6") @@ -281,6 +281,14 @@ class OperatorDealii : public OperatorBase this->vmult_internal(dst, src); else if (n_components == dim && fe_degree == 2 && n_q_points_1d == 3) this->vmult_internal(dst, src); + else if (n_components == 1 && fe_degree == 1 && n_q_points_1d == 3) + this->vmult_internal<1, 1, 3>(dst, src); + else if (n_components == 1 && fe_degree == 2 && n_q_points_1d == 4) + this->vmult_internal<1, 2, 4>(dst, src); + else if (n_components == dim && fe_degree == 1 && n_q_points_1d == 3) + this->vmult_internal(dst, src); + else if (n_components == dim && fe_degree == 2 && n_q_points_1d == 4) + this->vmult_internal(dst, src); else AssertThrow(false, ExcInternalError()); @@ -377,7 +385,7 @@ main(int argc, char *argv[]) using VectorType = LinearAlgebra::distributed::Vector; const unsigned int dim = 2; const unsigned int fe_degree = params.fe_degree; - const unsigned int n_q_points = fe_degree + 1; + const unsigned int n_q_points = (bp <= BPType::BP4) ? (fe_degree + 2) : (fe_degree + 1); const unsigned int n_refinements = params.n_global_refinements; const unsigned int n_components = (bp == BPType::BP1 || bp == BPType::BP3 || bp == BPType::BP5) ? 1 : dim; From 09fa620467dc57b7152a40264122aa94946bdfd5 Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Tue, 16 Sep 2025 21:23:38 +0200 Subject: [PATCH 6/6] Compute diagonal --- examples/deal.II/bps_02.cc | 66 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/examples/deal.II/bps_02.cc b/examples/deal.II/bps_02.cc index 5927bba726..471e1f6eb0 100644 --- a/examples/deal.II/bps_02.cc +++ b/examples/deal.II/bps_02.cc @@ -41,6 +41,7 @@ #include #include +#include // boost #include @@ -308,9 +309,32 @@ class OperatorDealii : public OperatorBase * Compute inverse of diagonal. */ void - compute_inverse_diagonal(VectorType &) const override + compute_inverse_diagonal(VectorType &diagonal) const override { - AssertThrow(false, ExcNotImplemented()); + this->initialize_dof_vector(diagonal); + + const unsigned int n_components = dof_handler.get_fe().n_components(); + const unsigned int fe_degree = dof_handler.get_fe().tensor_degree(); + const unsigned int n_q_points_1d = quadrature.get_tensor_basis()[0].size(); + + if (n_components == 1 && fe_degree == 1 && n_q_points_1d == 2) + this->compute_inverse_diagonal_internal<1, 1, 2>(diagonal); + else if (n_components == 1 && fe_degree == 2 && n_q_points_1d == 3) + this->compute_inverse_diagonal_internal<1, 2, 3>(diagonal); + else if (n_components == dim && fe_degree == 1 && n_q_points_1d == 2) + this->compute_inverse_diagonal_internal(diagonal); + else if (n_components == dim && fe_degree == 2 && n_q_points_1d == 3) + this->compute_inverse_diagonal_internal(diagonal); + else if (n_components == 1 && fe_degree == 1 && n_q_points_1d == 3) + this->compute_inverse_diagonal_internal<1, 1, 3>(diagonal); + else if (n_components == 1 && fe_degree == 2 && n_q_points_1d == 4) + this->compute_inverse_diagonal_internal<1, 2, 4>(diagonal); + else if (n_components == dim && fe_degree == 1 && n_q_points_1d == 3) + this->compute_inverse_diagonal_internal(diagonal); + else if (n_components == dim && fe_degree == 2 && n_q_points_1d == 4) + this->compute_inverse_diagonal_internal(diagonal); + else + AssertThrow(false, ExcInternalError()); } private: @@ -334,6 +358,38 @@ class OperatorDealii : public OperatorBase } } + /** + * Templated compute_inverse_diagonal function. + */ + template + void + compute_inverse_diagonal_internal(VectorType &diagonal) const + { + if (bp <= BPType::BP2) // mass matrix + { + OperatorDealiiMassQuad op_quad; + + MatrixFreeTools::compute_diagonal( + matrix_free, diagonal, op_quad, EvaluationFlags::values, EvaluationFlags::values); + } + else + { + OperatorDealiiLaplaceQuad op_quad; + + MatrixFreeTools::compute_diagonal( + matrix_free, diagonal, op_quad, EvaluationFlags::gradients, EvaluationFlags::gradients); + } + + + Number *diagonal_ptr = diagonal.get_values(); + + Kokkos::parallel_for( + "lethe::invert_vector", + Kokkos::RangePolicy( + 0, diagonal.locally_owned_size()), + KOKKOS_LAMBDA(int i) { diagonal_ptr[i] = 1.0 / diagonal_ptr[i]; }); + } + /** * Mapping object passed to the constructor. */ @@ -435,6 +491,10 @@ main(int argc, char *argv[]) // create solver ReductionControl reduction_control(100, 1e-20, 1e-6); + // create preconditioner + DiagonalMatrix diagonal_matrix; + op.compute_inverse_diagonal(diagonal_matrix.get_vector()); + std::chrono::time_point now; bool not_converged = false; @@ -444,7 +504,7 @@ main(int argc, char *argv[]) // solve problem SolverCG solver(reduction_control); now = std::chrono::system_clock::now(); - solver.solve(op, v, u, PreconditionIdentity()); + solver.solve(op, v, u, diagonal_matrix); } catch (const SolverControl::NoConvergence &) {