From 59fd95855392c76bdad0eb669981561adc06cdc3 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 16 Jul 2025 17:58:39 +0200 Subject: [PATCH] add synchronization loggable event --- core/device_hooks/cuda_hooks.cpp | 4 +-- core/device_hooks/dpcpp_hooks.cpp | 4 +-- core/device_hooks/hip_hooks.cpp | 4 +-- core/log/profiler_hook.cpp | 20 +++++++++-- cuda/base/executor.cpp | 4 +-- devices/omp/executor.cpp | 4 +-- dpcpp/base/executor.dp.cpp | 4 +-- hip/base/executor.hip.cpp | 4 +-- include/ginkgo/core/base/executor.hpp | 41 +++++++++++++++++------ include/ginkgo/core/log/logger.hpp | 19 ++++++++++- include/ginkgo/core/log/profiler_hook.hpp | 6 +++- 11 files changed, 85 insertions(+), 29 deletions(-) diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index 4124ac2bea5..d69e3cf868f 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -141,7 +141,7 @@ void CudaExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, GKO_NOT_COMPILED(cuda); -void CudaExecutor::synchronize() const GKO_NOT_COMPILED(cuda); +void CudaExecutor::synchronize_impl() const GKO_NOT_COMPILED(cuda); scoped_device_id_guard CudaExecutor::get_scoped_device_id_guard() const diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index 470fd9befc4..1a4bf40abed 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -84,7 +84,7 @@ void DpcppExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, GKO_NOT_COMPILED(dpcpp); -void DpcppExecutor::synchronize() const GKO_NOT_COMPILED(dpcpp); +void DpcppExecutor::synchronize_impl() const GKO_NOT_COMPILED(dpcpp); scoped_device_id_guard DpcppExecutor::get_scoped_device_id_guard() const diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index 7f3497e8020..f5c5a955e4e 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -140,7 +140,7 @@ void HipExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, GKO_NOT_COMPILED(hip); -void HipExecutor::synchronize() const GKO_NOT_COMPILED(hip); +void HipExecutor::synchronize_impl() const GKO_NOT_COMPILED(hip); scoped_device_id_guard HipExecutor::get_scoped_device_id_guard() const diff --git a/core/log/profiler_hook.cpp b/core/log/profiler_hook.cpp index e3ed0ad8299..56a83c8693b 100644 --- a/core/log/profiler_hook.cpp +++ b/core/log/profiler_hook.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -280,6 +280,18 @@ void ProfilerHook::on_iteration_complete( } +void ProfilerHook::on_synchronize_started(const gko::Executor*) const +{ + this->begin_hook_("synchronize", profile_event_category::operation); +} + + +void ProfilerHook::on_synchronize_completed(const gko::Executor*) const +{ + this->end_hook_("synchronize", profile_event_category::operation); +} + + bool ProfilerHook::needs_propagation() const { return true; } @@ -306,10 +318,12 @@ void ProfilerHook::set_synchronization(bool synchronize) void ProfilerHook::maybe_synchronize(const Executor* exec) const { if (synchronize_) { - profiling_scope_guard sync_guard{"synchronize", + profiling_scope_guard sync_guard{"logger_synchronize", profile_event_category::internal, begin_hook_, end_hook_}; - exec->synchronize(); + // we call synchronize_impl not synchronize to separate the syncrhonize + // caller. + exec->synchronize_impl(); } } diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 8380eddcf1b..8bd7de3d430 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -165,7 +165,7 @@ void CudaExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, } -void CudaExecutor::synchronize() const +void CudaExecutor::synchronize_impl() const { detail::cuda_scoped_device_id_guard g(this->get_device_id()); GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamSynchronize(this->get_stream())); diff --git a/devices/omp/executor.cpp b/devices/omp/executor.cpp index 54b9c9c36be..8eb5541e10d 100644 --- a/devices/omp/executor.cpp +++ b/devices/omp/executor.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -58,7 +58,7 @@ void OmpExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, } -void OmpExecutor::synchronize() const +void OmpExecutor::synchronize_impl() const { // This is a no-op for single-threaded OMP // TODO: change when adding support for multi-threaded OMP execution diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 06a2643f926..67fb67cbcc2 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -153,7 +153,7 @@ void DpcppExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, } -void DpcppExecutor::synchronize() const { queue_->wait_and_throw(); } +void DpcppExecutor::synchronize_impl() const { queue_->wait_and_throw(); } scoped_device_id_guard DpcppExecutor::get_scoped_device_id_guard() const { diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 769d650d984..b880c56e54c 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -161,7 +161,7 @@ void HipExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, } -void HipExecutor::synchronize() const +void HipExecutor::synchronize_impl() const { detail::hip_scoped_device_id_guard g(this->get_device_id()); GKO_ASSERT_NO_HIP_ERRORS(hipStreamSynchronize(this->get_stream())); diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 224860b72b7..11dfcb950ec 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -141,6 +141,15 @@ class ExecutorBase; } // namespace detail +namespace log { + + +class ProfilerHook; + + +} // namespace log + + /** * Operations can be used to define functionalities whose implementations differ * among devices. @@ -619,6 +628,8 @@ class Executor : public log::EnableLogging { GKO_ENABLE_FOR_ALL_EXECUTORS(GKO_DECLARE_EXECUTOR_FRIEND); friend class ReferenceExecutor; + friend class log::ProfilerHook; + public: virtual ~Executor() = default; @@ -827,7 +838,12 @@ class Executor : public log::EnableLogging { /** * Synchronize the operations launched on the executor with its master. */ - virtual void synchronize() const = 0; + void synchronize() const + { + this->template log(this); + this->synchronize_impl(); + this->template log(this); + } /** * @copydoc Loggable::add_logger @@ -1023,6 +1039,11 @@ class Executor : public log::EnableLogging { */ const exec_info& get_exec_info() const { return this->exec_info_; } + /** + * Synchronize the operations launched on the executor with its master. + */ + virtual void synchronize_impl() const = 0; + /** * Allocates raw memory in this Executor. * @@ -1404,8 +1425,6 @@ class OmpExecutor : public detail::ExecutorBase, std::shared_ptr get_master() const noexcept override; - void synchronize() const override; - int get_num_cores() const { return this->get_exec_info().num_computing_units; @@ -1431,6 +1450,8 @@ class OmpExecutor : public detail::ExecutorBase, void populate_exec_info(const machine_topology* mach_topo) override; + void synchronize_impl() const override; + void* raw_alloc(size_type size) const override; void raw_free(void* ptr) const noexcept override; @@ -1588,8 +1609,6 @@ class CudaExecutor : public detail::ExecutorBase, std::shared_ptr get_master() const noexcept override; - void synchronize() const override; - scoped_device_id_guard get_scoped_device_id_guard() const override; std::string get_description() const override; @@ -1731,6 +1750,8 @@ class CudaExecutor : public detail::ExecutorBase, this->init_handles(); } + void synchronize_impl() const override; + void* raw_alloc(size_type size) const override; void raw_free(void* ptr) const noexcept override; @@ -1812,8 +1833,6 @@ class HipExecutor : public detail::ExecutorBase, std::shared_ptr get_master() const noexcept override; - void synchronize() const override; - scoped_device_id_guard get_scoped_device_id_guard() const override; std::string get_description() const override; @@ -1949,6 +1968,8 @@ class HipExecutor : public detail::ExecutorBase, this->init_handles(); } + void synchronize_impl() const override; + void* raw_alloc(size_type size) const override; void raw_free(void* ptr) const noexcept override; @@ -2017,8 +2038,6 @@ class DpcppExecutor : public detail::ExecutorBase, std::shared_ptr get_master() const noexcept override; - void synchronize() const override; - scoped_device_id_guard get_scoped_device_id_guard() const override; std::string get_description() const override; @@ -2132,6 +2151,8 @@ class DpcppExecutor : public detail::ExecutorBase, void populate_exec_info(const machine_topology* mach_topo) override; + void synchronize_impl() const override; + void* raw_alloc(size_type size) const override; void raw_free(void* ptr) const noexcept override; diff --git a/include/ginkgo/core/log/logger.hpp b/include/ginkgo/core/log/logger.hpp index 23bc8a227b7..455cba68a15 100644 --- a/include/ginkgo/core/log/logger.hpp +++ b/include/ginkgo/core/log/logger.hpp @@ -617,6 +617,22 @@ public: \ #endif +public: + /** + * synchronize started event. + * + * @param exec the executor used + */ + GKO_LOGGER_REGISTER_EVENT(27, synchronize_started, const Executor* exec) + + /** + * synchronize completed event. + * + * @param exec the executor used + */ + GKO_LOGGER_REGISTER_EVENT(28, synchronize_completed, const Executor* exec) + + public: #undef GKO_LOGGER_REGISTER_EVENT @@ -626,7 +642,8 @@ public: \ static constexpr mask_type executor_events_mask = allocation_started_mask | allocation_completed_mask | free_started_mask | free_completed_mask | copy_started_mask | - copy_completed_mask; + copy_completed_mask | synchronize_started_mask | + synchronize_completed_mask; /** * Bitset Mask which activates all operation events diff --git a/include/ginkgo/core/log/profiler_hook.hpp b/include/ginkgo/core/log/profiler_hook.hpp index c5dc9dcbab6..fce96185365 100644 --- a/include/ginkgo/core/log/profiler_hook.hpp +++ b/include/ginkgo/core/log/profiler_hook.hpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors // // SPDX-License-Identifier: BSD-3-Clause @@ -87,6 +87,10 @@ class ProfilerHook : public Logger { void on_operation_completed(const Executor* exec, const Operation* operation) const override; + void on_synchronize_started(const Executor*) const override; + + void on_synchronize_completed(const Executor*) const override; + /* PolymorphicObject events */ void on_polymorphic_object_copy_started( const Executor* exec, const PolymorphicObject* from,