Skip to content

Commit daca6a3

Browse files
authored
[SYCL] Optimize TLS usage in NestedCallsTracker and tls_code_loc_t (#18787)
This PR decreases the number of TLS accesses in the `NestedCallsTracker` and `tls_code_loc_t`. The idea is to cache TLS location in the reference. As a result, we have only a single lookup for the TLS location.
1 parent d647d5f commit daca6a3

File tree

4 files changed

+69
-5
lines changed

4 files changed

+69
-5
lines changed

sycl/include/sycl/detail/common.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88

99
#pragma once
1010

11+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
12+
#include <sycl/exception.hpp> // for sycl::exception, sycl::errc
13+
#endif // #ifndef __INTEL_PREVIEW_BREAKING_CHANGES
1114
#include <sycl/detail/defines_elementary.hpp> // for __SYCL_ALWAYS_INLINE
1215
#include <sycl/detail/export.hpp> // for __SYCL_EXPORT
1316

@@ -139,6 +142,15 @@ class __SYCL_EXPORT tls_code_loc_t {
139142
// Used to maintain global state (GCodeLocTLS), so we do not want to copy
140143
tls_code_loc_t(const tls_code_loc_t &) = delete;
141144
tls_code_loc_t &operator=(const tls_code_loc_t &) = delete;
145+
#else
146+
tls_code_loc_t &operator=(const tls_code_loc_t &) {
147+
// Should never be called. In PREVIEW we marked it as deleted, but
148+
// before ABI breaking change we need to keep it for backward compatibility.
149+
assert(false && "tls_code_loc_t should not be copied");
150+
throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
151+
"tls_code_loc_t should not be copied");
152+
return *this;
153+
}
142154
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
143155

144156
/// If the code location is set up by this instance, reset it.
@@ -152,6 +164,10 @@ class __SYCL_EXPORT tls_code_loc_t {
152164
bool isToplevel() const { return !MLocalScope; }
153165

154166
private:
167+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
168+
// Cache the TLS location to decrease amount of TLS accesses.
169+
detail::code_location &CodeLocTLSRef;
170+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
155171
// The flag that is used to determine if the object is in a local scope or in
156172
// the top level scope.
157173
bool MLocalScope = true;

sycl/source/detail/common.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,18 @@ static thread_local detail::code_location GCodeLocTLS = {};
2424
/// check and see if code location object is available. If not, continue with
2525
/// instrumentation as needed
2626
tls_code_loc_t::tls_code_loc_t()
27+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
28+
: CodeLocTLSRef(GCodeLocTLS),
29+
// Check TLS to see if a previously stashed code_location object is
30+
// available; if so, we are in a local scope.
31+
MLocalScope(CodeLocTLSRef.fileName() && CodeLocTLSRef.functionName())
32+
#else
2733
: // Check TLS to see if a previously stashed code_location object is
2834
// available; if so, we are in a local scope.
29-
MLocalScope(GCodeLocTLS.fileName() && GCodeLocTLS.functionName()) {}
35+
MLocalScope(GCodeLocTLS.fileName() && GCodeLocTLS.functionName())
36+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
37+
{
38+
}
3039

3140
ur_code_location_t codeLocationCallback(void *) {
3241
ur_code_location_t codeloc;
@@ -44,24 +53,45 @@ ur_code_location_t codeLocationCallback(void *) {
4453
/// location has been stashed in the TLS at a higher level. If not, we have the
4554
/// code location information that must be active for the current calling scope.
4655
tls_code_loc_t::tls_code_loc_t(const detail::code_location &CodeLoc)
56+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
57+
: CodeLocTLSRef(GCodeLocTLS),
58+
// Check TLS to see if a previously stashed code_location object is
59+
// available; if so, then don't overwrite the previous information as we
60+
// are still in scope of the instrumented function.
61+
MLocalScope(CodeLocTLSRef.fileName() && CodeLocTLSRef.functionName()) {
62+
if (!MLocalScope)
63+
// Update the TLS information with the code_location information
64+
CodeLocTLSRef = CodeLoc;
65+
#else
4766
: // Check TLS to see if a previously stashed code_location object is
4867
// available; if so, then don't overwrite the previous information as we
4968
// are still in scope of the instrumented function.
5069
MLocalScope(GCodeLocTLS.fileName() && GCodeLocTLS.functionName()) {
5170
if (!MLocalScope)
5271
// Update the TLS information with the code_location information
5372
GCodeLocTLS = CodeLoc;
73+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
5474
}
5575

5676
/// @brief If we are the top lovel scope, reset the code location info
5777
tls_code_loc_t::~tls_code_loc_t() {
5878
// Only reset the TLS data if the top level function is going out of scope
5979
if (!MLocalScope) {
80+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
81+
CodeLocTLSRef = {};
82+
#else
6083
GCodeLocTLS = {};
84+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
6185
}
6286
}
6387

64-
const detail::code_location &tls_code_loc_t::query() { return GCodeLocTLS; }
88+
const detail::code_location &tls_code_loc_t::query() {
89+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
90+
return CodeLocTLSRef;
91+
#else
92+
return GCodeLocTLS;
93+
#endif // __INTEL_PREVIEW_BREAKING_CHANGES
94+
}
6595

6696
} // namespace detail
6797
} // namespace _V1

sycl/source/detail/queue_impl.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,19 @@ thread_local bool NestedCallsDetector = false;
3333
class NestedCallsTracker {
3434
public:
3535
NestedCallsTracker() {
36-
if (NestedCallsDetector)
36+
if (NestedCallsDetectorRef)
3737
throw sycl::exception(
3838
make_error_code(errc::invalid),
3939
"Calls to sycl::queue::submit cannot be nested. Command group "
4040
"function objects should use the sycl::handler API instead.");
41-
NestedCallsDetector = true;
41+
NestedCallsDetectorRef = true;
4242
}
4343

44-
~NestedCallsTracker() { NestedCallsDetector = false; }
44+
~NestedCallsTracker() { NestedCallsDetectorRef = false; }
45+
46+
private:
47+
// Cache the TLS location to decrease amount of TLS accesses.
48+
bool &NestedCallsDetectorRef = NestedCallsDetector;
4549
};
4650

4751
static std::vector<ur_event_handle_t>
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// RUN: %clangxx -fsycl -c -fno-color-diagnostics -Xclang -fdump-record-layouts %s -o %t.out | FileCheck %s
2+
// REQUIRES: linux
3+
// UNSUPPORTED: libcxx
4+
5+
// clang-format off
6+
7+
#include <sycl/detail/common.hpp>
8+
9+
void foo(sycl::detail::tls_code_loc_t) {}
10+
11+
// CHECK: 0 | class sycl::detail::tls_code_loc_t
12+
// CHECK-NEXT: 0 | _Bool MLocalScope
13+
// CHECK-NEXT: | [sizeof=1, dsize=1, align=1,
14+
// CHECK-NEXT: | nvsize=1, nvalign=1]

0 commit comments

Comments
 (0)