Skip to content

Commit 1663528

Browse files
q10facebook-github-bot
authored andcommitted
Deprecate barrier isolation macros (#4357)
Summary: Pull Request resolved: #4357 X-link: facebookresearch/FBGEMM#1424 - Deprecate barrier isolation macros since they have been replaced by `FBGEMM_LAUNCH_KERNEL` Reviewed By: spcyppt Differential Revision: D76700671 fbshipit-source-id: 41ba94737225f108d45d701c5b3298d5423a4cef
1 parent e368332 commit 1663528

File tree

3 files changed

+38
-63
lines changed

3 files changed

+38
-63
lines changed

fbgemm_gpu/codegen/training/backward/embedding_backward_split_template.cu

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include "fbgemm_gpu/sparse_ops.h"
2525
#include "fbgemm_gpu/config/feature_gates.h"
2626
#include "fbgemm_gpu/split_embeddings_utils.cuh"
27-
#include "fbgemm_gpu/utils/barrier_isolation.cuh"
2827
#include "fbgemm_gpu/utils/kernel_launcher.cuh"
2928
#include "fbgemm_gpu/utils/ops_utils.h"
3029
#include "fbgemm_gpu/utils/tensor_accessor_builder.h"

fbgemm_gpu/include/fbgemm_gpu/utils/barrier_isolation.cuh

Lines changed: 0 additions & 40 deletions
This file was deleted.

fbgemm_gpu/include/fbgemm_gpu/utils/kernel_launcher.cuh

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ struct KernelLauncher {
395395
} // namespace fbgemm_gpu::utils
396396

397397
////////////////////////////////////////////////////////////////////////////////
398-
// Macro create a compile-time concatenation of __TEMPLATE_SOURCE_FILE__ and
398+
// Macro to create a compile-time concatenation of __TEMPLATE_SOURCE_FILE__ and
399399
// __FILE__
400400
//
401401
// This is used for reporting the template filename into to Torch DSA. Runtime
@@ -412,25 +412,8 @@ struct KernelLauncher {
412412
#endif
413413

414414
////////////////////////////////////////////////////////////////////////////////
415-
// General Kernel Launch Macros for FBGEMM GPU Kernels
416-
//
417-
// This macro is used to launch GPU kernels in FBGEMM GPU codebase. It runs a
418-
// set of constraint checks on kernel parameters and and tensor arguments, and
419-
// throws descriptive errors on constraint failures.
420-
//
421-
// NOTES:
422-
//
423-
// - Since the code is wrapped inside an immediately-invoked lambda,
424-
// source_location::current() will point to the function where the macro is
425-
// called.
426-
//
427-
// - The constexpr decltype(KERNEL) declaration is added to enable for better
428-
// compilation error messages upon template argument and function overload
429-
// mismatches.
430-
//
431-
// - The macro expression is wrapped inside a parenthesis to avoid commas from
432-
// interfering with preoprocessing when this macro is invoked inside another
433-
// macro.
415+
// Macro to define _FKL_TFILE_ to be __TEMPLATE_SOURCE_FILE__ if it is defined,
416+
// else empty string
434417
////////////////////////////////////////////////////////////////////////////////
435418

436419
#ifdef __TEMPLATE_SOURCE_FILE__
@@ -439,12 +422,31 @@ struct KernelLauncher {
439422
#define _FKL_TFILE_ ""
440423
#endif
441424

425+
////////////////////////////////////////////////////////////////////////////////
426+
// Enable Kernel Barrier Isolation
427+
//
428+
// When this flag is defined, kernel's execution is isolated from other GPU
429+
// processes that might otherwise have been running concurrently. This acts as
430+
// a performance profiling tool used in conjunction with trace inspection to
431+
// determine whether a kernel's regression might be due to other GPU processes
432+
// competing for memory bandwidth that is causing the kernel slowdown, which can
433+
// be especially relevant when data accessed by the kernel is in UVM.
434+
////////////////////////////////////////////////////////////////////////////////
435+
442436
#ifdef FBGEMM_GPU_ISOLATE_KERNEL_LAUNCH
443437
#define _FKL_BLOCKING_ true
444438
#else
445439
#define _FKL_BLOCKING_ false
446440
#endif
447441

442+
////////////////////////////////////////////////////////////////////////////////
443+
// Enable Tensor Value Checks
444+
//
445+
// When defined, tensors that are passed into the kernel launcher via TA_B() or
446+
// PTA_B() will be checked for NaN and Inf values. This is an expensive check
447+
// and is meant to be used for debugging.
448+
////////////////////////////////////////////////////////////////////////////////
449+
448450
#ifdef FBGEMM_GPU_TENSORCHECK
449451
#define _FKL_TENSORCHECK_ true
450452
#else
@@ -473,8 +475,22 @@ struct KernelLauncher {
473475
////////////////////////////////////////////////////////////////////////////////
474476
// Kernel Launcher Macros for FBGEMM GPU Kernels
475477
//
476-
// This macro simplifies the kernel launch process by wrapping the kernel
477-
// launches into simple-to-use macros.
478+
// This macro simplifies the construction and execution of KernelLauncher
479+
// instances by wrapping the kernel launches into simple-to-use macros.
480+
//
481+
// NOTES:
482+
//
483+
// - Since the code is wrapped inside an immediately-invoked lambda,
484+
// source_location::current() will point to the function where the macro is
485+
// called.
486+
//
487+
// - The constexpr decltype(KERNEL) declaration is added to enable for better
488+
// compilation error messages upon template argument and function overload
489+
// mismatches.
490+
//
491+
// - The macro expression is wrapped inside a parenthesis to avoid commas from
492+
// interfering with preoprocessing when this macro is invoked inside another
493+
// macro.
478494
////////////////////////////////////////////////////////////////////////////////
479495

480496
#define FBGEMM_LAUNCH_KERNEL(KERNEL, GRID, BLOCK, SMEM, STREAM, ...) \

0 commit comments

Comments
 (0)