Deprecate barrier isolation macros (#4357)

q10 · facebook-github-bot · commit 166352890aa1 · 2025-06-23T12:13:47.000-07:00
Summary: Pull Request resolved: #4357 X-link: facebookresearch/FBGEMM#1424 - Deprecate barrier isolation macros since they have been replaced by `FBGEMM_LAUNCH_KERNEL` Reviewed By: spcyppt Differential Revision: D76700671 fbshipit-source-id: 41ba94737225f108d45d701c5b3298d5423a4cef
diff --git a/fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu b/fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu
@@ -24,7 +24,6 @@
 #include "fbgemm_gpu/sparse_ops.h"
 #include "fbgemm_gpu/config/feature_gates.h"
 #include "fbgemm_gpu/split_embeddings_utils.cuh"
-#include "fbgemm_gpu/utils/barrier_isolation.cuh"
 #include "fbgemm_gpu/utils/kernel_launcher.cuh"
 #include "fbgemm_gpu/utils/ops_utils.h"
 #include "fbgemm_gpu/utils/tensor_accessor_builder.h"
diff --git a/fbgemm_gpu/include/fbgemm_gpu/utils/barrier_isolation.cuh b/fbgemm_gpu/include/fbgemm_gpu/utils/barrier_isolation.cuh
diff --git a/fbgemm_gpu/include/fbgemm_gpu/utils/kernel_launcher.cuh b/fbgemm_gpu/include/fbgemm_gpu/utils/kernel_launcher.cuh
@@ -395,7 +395,7 @@ struct KernelLauncher {
 } // namespace fbgemm_gpu::utils
 
 ////////////////////////////////////////////////////////////////////////////////
-// Macro create a compile-time concatenation of __TEMPLATE_SOURCE_FILE__ and
+// Macro to create a compile-time concatenation of __TEMPLATE_SOURCE_FILE__ and
 // __FILE__
 //
 // This is used for reporting the template filename into to Torch DSA.  Runtime
@@ -412,25 +412,8 @@ struct KernelLauncher {
 #endif
 
 ////////////////////////////////////////////////////////////////////////////////
-// General Kernel Launch Macros for FBGEMM GPU Kernels
-//
-// This macro is used to launch GPU kernels in FBGEMM GPU codebase. It runs a
-// set of constraint checks on kernel parameters and and tensor arguments, and
-// throws descriptive errors on constraint failures.
-//
-// NOTES:
-//
-//  - Since the code is wrapped inside an immediately-invoked lambda,
-//  source_location::current() will point to the function where the macro is
-//  called.
-//
-//  - The constexpr decltype(KERNEL) declaration is added to enable for better
-//  compilation error messages upon template argument and function overload
-//  mismatches.
-//
-//  - The macro expression is wrapped inside a parenthesis to avoid commas from
-//  interfering with preoprocessing when this macro is invoked inside another
-//  macro.
+// Macro to define _FKL_TFILE_ to be __TEMPLATE_SOURCE_FILE__ if it is defined,
+// else empty string
 ////////////////////////////////////////////////////////////////////////////////
 
 #ifdef __TEMPLATE_SOURCE_FILE__
@@ -439,12 +422,31 @@ struct KernelLauncher {
 #define _FKL_TFILE_ ""
 #endif
 
+////////////////////////////////////////////////////////////////////////////////
+// Enable Kernel Barrier Isolation
+//
+// When this flag is defined, kernel's execution is isolated from other GPU
+// processes that might otherwise have been running concurrently.  This acts as
+// a performance profiling tool used in conjunction with trace inspection to
+// determine whether a kernel's regression might be due to other GPU processes
+// competing for memory bandwidth that is causing the kernel slowdown, which can
+// be especially relevant when data accessed by the kernel is in UVM.
+////////////////////////////////////////////////////////////////////////////////
+
 #ifdef FBGEMM_GPU_ISOLATE_KERNEL_LAUNCH
 #define _FKL_BLOCKING_ true
 #else
 #define _FKL_BLOCKING_ false
 #endif
 
+////////////////////////////////////////////////////////////////////////////////
+// Enable Tensor Value Checks
+//
+// When defined, tensors that are passed into the kernel launcher via TA_B() or
+// PTA_B() will be checked for NaN and Inf values.  This is an expensive check
+// and is meant to be used for debugging.
+////////////////////////////////////////////////////////////////////////////////
+
 #ifdef FBGEMM_GPU_TENSORCHECK
 #define _FKL_TENSORCHECK_ true
 #else
@@ -473,8 +475,22 @@ struct KernelLauncher {
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel Launcher Macros for FBGEMM GPU Kernels
 //
-// This macro simplifies the kernel launch process by wrapping the kernel
-// launches into simple-to-use macros.
+// This macro simplifies the construction and execution of KernelLauncher
+// instances by wrapping the kernel launches into simple-to-use macros.
+//
+// NOTES:
+//
+//  - Since the code is wrapped inside an immediately-invoked lambda,
+//  source_location::current() will point to the function where the macro is
+//  called.
+//
+//  - The constexpr decltype(KERNEL) declaration is added to enable for better
+//  compilation error messages upon template argument and function overload
+//  mismatches.
+//
+//  - The macro expression is wrapped inside a parenthesis to avoid commas from
+//  interfering with preoprocessing when this macro is invoked inside another
+//  macro.
 ////////////////////////////////////////////////////////////////////////////////
 
 #define FBGEMM_LAUNCH_KERNEL(KERNEL, GRID, BLOCK, SMEM, STREAM, ...)        \