Skip to content

Commit cd524f9

Browse files
authored
Merge branch 'main' into num_compute_units
2 parents a8d7b49 + ce87652 commit cd524f9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1740
-943
lines changed

.github/workflows/build-hw-reusable.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ jobs:
112112

113113
- name: Test adapter specific
114114
working-directory: ${{github.workspace}}/build
115-
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180
115+
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 180
116116
# Don't run adapter specific tests when building multiple adapters
117117
if: ${{ matrix.adapter.other_name == '' }}
118118

cmake/helpers.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ set(CFI_FLAGS "")
8383
if (CFI_HAS_CFI_SANITIZE)
8484
# cfi-icall requires called functions in shared libraries to also be built with cfi-icall, which we can't
8585
# guarantee. -fsanitize=cfi depends on -flto
86-
set(CFI_FLAGS "-flto -fsanitize=cfi -fno-sanitize=cfi-icall -fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/sanitizer-ignorelist.txt")
86+
set(CFI_FLAGS "-flto -fsanitize=cfi -fno-sanitize=cfi-icall -fsanitize-ignorelist=${PROJECT_SOURCE_DIR}/sanitizer-ignorelist.txt")
8787
endif()
8888

8989
function(add_ur_target_compile_options name)

examples/collector/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ add_ur_library(${TARGET_NAME} SHARED
1010
)
1111

1212
target_include_directories(${TARGET_NAME} PRIVATE
13-
${CMAKE_SOURCE_DIR}/include
13+
${PROJECT_SOURCE_DIR}/include
1414
)
1515

1616
target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI})

include/ur_api.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3555,10 +3555,10 @@ typedef struct ur_usm_pool_limits_desc_t {
35553555
/// + If ::UR_DEVICE_INFO_USM_HOST_SUPPORT is false.
35563556
/// - ::UR_RESULT_ERROR_INVALID_VALUE
35573557
/// + `pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align-1)) != 0)`
3558-
/// + If `align` is greater that the size of the largest data type supported by `hDevice`.
3558+
/// + If `align` is greater that the size of the largest data type supported by any device in `hContext`.
35593559
/// - ::UR_RESULT_ERROR_INVALID_USM_SIZE
35603560
/// + `size == 0`
3561-
/// + `size` is greater than ::UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE.
3561+
/// + `size` is greater than ::UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE for any device in `hContext`
35623562
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
35633563
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
35643564
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE

scripts/benchmarks/benches/compute.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,9 @@ def benchmarks(self) -> list[Benchmark]:
7878

7979
if options.ur is not None:
8080
benches += [
81-
SubmitKernelUR(self, 0),
82-
SubmitKernelUR(self, 1),
81+
SubmitKernelUR(self, 0, 0),
82+
SubmitKernelUR(self, 1, 0),
83+
SubmitKernelUR(self, 1, 1),
8384
]
8485

8586
return benches
@@ -180,13 +181,14 @@ def bin_args(self) -> list[str]:
180181
]
181182

182183
class SubmitKernelUR(ComputeBenchmark):
183-
def __init__(self, bench, ioq):
184+
def __init__(self, bench, ioq, measureCompletion):
184185
self.ioq = ioq
186+
self.measureCompletion = measureCompletion
185187
super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")
186188

187189
def name(self):
188190
order = "in order" if self.ioq else "out of order"
189-
return f"api_overhead_benchmark_ur SubmitKernel {order}"
191+
return f"api_overhead_benchmark_ur SubmitKernel {order}" + (" with measure completion" if self.measureCompletion else "")
190192

191193
def explicit_group(self):
192194
return "SubmitKernel"
@@ -195,7 +197,7 @@ def bin_args(self) -> list[str]:
195197
return [
196198
f"--Ioq={self.ioq}",
197199
"--DiscardEvents=0",
198-
"--MeasureCompletion=0",
200+
f"--MeasureCompletion={self.measureCompletion}",
199201
"--iterations=100000",
200202
"--Profiling=0",
201203
"--NumKernels=10",

scripts/benchmarks/benches/options.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class Options:
2626
# these two should probably be merged into one setting
2727
stddev_threshold: float = 0.02
2828
epsilon: float = 0.02
29+
iterations_stddev: int = 5
2930

3031
options = Options()
3132

scripts/benchmarks/main.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
160160
merged_env_vars = {**additional_env_vars}
161161
intermediate_results: dict[str, list[Result]] = {}
162162
processed: list[Result] = []
163-
for _ in range(5):
163+
for _ in range(options.iterations_stddev):
164164
run_iterations(benchmark, merged_env_vars, options.iterations, intermediate_results)
165165
valid, processed = process_results(intermediate_results, benchmark.stddev_threshold())
166166
if valid:
@@ -252,6 +252,12 @@ def validate_and_parse_env_args(env_args):
252252
parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False)
253253
parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True)
254254
parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False)
255+
parser.add_argument(
256+
"--iterations-stddev",
257+
type=int,
258+
help="Max number of iterations of the loop calculating stddev after completed benchmark runs",
259+
default=options.iterations_stddev,
260+
)
255261

256262
args = parser.parse_args()
257263
additional_env_vars = validate_and_parse_env_args(args.env)
@@ -272,6 +278,7 @@ def validate_and_parse_env_args(env_args):
272278
options.output_markdown = args.output_markdown
273279
options.dry_run = args.dry_run
274280
options.umf = args.umf
281+
options.iterations_stddev = args.iterations_stddev
275282

276283
benchmark_filter = re.compile(args.filter) if args.filter else None
277284

scripts/core/usm.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,10 +257,10 @@ returns:
257257
- "If $X_DEVICE_INFO_USM_HOST_SUPPORT is false."
258258
- $X_RESULT_ERROR_INVALID_VALUE:
259259
- "`pUSMDesc && pUSMDesc->align != 0 && ((pUSMDesc->align & (pUSMDesc->align-1)) != 0)`" # alignment must be power of two
260-
- "If `align` is greater that the size of the largest data type supported by `hDevice`."
260+
- "If `align` is greater that the size of the largest data type supported by any device in `hContext`."
261261
- $X_RESULT_ERROR_INVALID_USM_SIZE:
262262
- "`size == 0`"
263-
- "`size` is greater than $X_DEVICE_INFO_MAX_MEM_ALLOC_SIZE."
263+
- "`size` is greater than $X_DEVICE_INFO_MAX_MEM_ALLOC_SIZE for any device in `hContext`"
264264
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
265265
- $X_RESULT_ERROR_OUT_OF_RESOURCES
266266
- $X_RESULT_ERROR_UNSUPPORTED_FEATURE:

scripts/templates/helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,7 @@ def make_param_checks(namespace, tags, obj, cpp=False, meta=None):
11301130
for key, values in item.items():
11311131
key = subt(namespace, tags, key, False, cpp)
11321132
for val in values:
1133-
code = re.match(r"^\`(.*)\`$", val)
1133+
code = re.match(r"^\`([^`]*)\`$", val)
11341134
if code:
11351135
if key not in checks:
11361136
checks[key] = []

scripts/templates/ldrddi.cpp.mako

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,37 @@ from templates import helper as th
2424
namespace ur_loader
2525
{
2626
%for obj in th.get_adapter_functions(specs):
27+
<%
28+
func_name = th.make_func_name(n, tags, obj)
29+
if func_name.startswith(x):
30+
func_basename = func_name[len(x):]
31+
else:
32+
func_basename = func_name
33+
%>
34+
%if func_basename == "EventSetCallback":
35+
namespace {
36+
struct event_callback_wrapper_data_t {
37+
${x}_event_callback_t fn;
38+
${x}_event_handle_t event;
39+
void *userData;
40+
};
41+
42+
void event_callback_wrapper([[maybe_unused]] ${x}_event_handle_t hEvent,
43+
${x}_execution_info_t execStatus, void *pUserData) {
44+
auto *wrapper =
45+
reinterpret_cast<event_callback_wrapper_data_t *>(pUserData);
46+
(wrapper->fn)(wrapper->event, execStatus, wrapper->userData);
47+
delete wrapper;
48+
}
49+
}
50+
51+
%endif
2752
///////////////////////////////////////////////////////////////////////////////
28-
/// @brief Intercept function for ${th.make_func_name(n, tags, obj)}
53+
/// @brief Intercept function for ${func_name}
2954
%if 'condition' in obj:
3055
#if ${th.subt(n, tags, obj['condition'])}
3156
%endif
32-
__${x}dlllocal ${x}_result_t ${X}_APICALL
33-
${th.make_func_name(n, tags, obj)}(
57+
__${x}dlllocal ${x}_result_t ${X}_APICALL ${func_name}(
3458
%for line in th.make_param_lines(n, tags, obj):
3559
${line}
3660
%endfor
@@ -41,7 +65,7 @@ namespace ur_loader
4165
%>${th.get_initial_null_set(obj)}
4266

4367
[[maybe_unused]] auto context = getContext();
44-
%if re.match(r"\w+AdapterGet$", th.make_func_name(n, tags, obj)):
68+
%if func_basename == "AdapterGet":
4569

4670
size_t adapterIndex = 0;
4771
if( nullptr != ${obj['params'][1]['name']} && ${obj['params'][0]['name']} !=0)
@@ -74,7 +98,7 @@ namespace ur_loader
7498
*${obj['params'][2]['name']} = static_cast<uint32_t>(context->platforms.size());
7599
}
76100

77-
%elif re.match(r"\w+PlatformGet$", th.make_func_name(n, tags, obj)):
101+
%elif func_basename == "PlatformGet":
78102
uint32_t total_platform_handle_count = 0;
79103

80104
for( uint32_t adapter_index = 0; adapter_index < ${obj['params'][1]['name']}; adapter_index++)
@@ -132,6 +156,16 @@ namespace ur_loader
132156
<%break%>
133157
%endif
134158
%endfor
159+
%if func_basename == "EventSetCallback":
160+
161+
// Replace the callback with a wrapper function that gives the callback the loader event rather than a
162+
// backend-specific event
163+
auto *wrapper_data =
164+
new event_callback_wrapper_data_t{pfnNotify, hEvent, pUserData};
165+
pUserData = wrapper_data;
166+
pfnNotify = event_callback_wrapper;
167+
168+
%endif
135169
%for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)):
136170
%if 'range' in item:
137171
<%
@@ -263,7 +297,7 @@ namespace ur_loader
263297
%for i, item in enumerate(epilogue):
264298
%if 0 == i and not item['release'] and not item['retain'] and not th.always_wrap_outputs(obj):
265299
## TODO: Remove once we have a concrete way for submitting warnings in place.
266-
%if re.match(r"urEnqueue\w+", th.make_func_name(n, tags, obj)):
300+
%if re.match(r"Enqueue\w+", func_basename):
267301
// In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below.
268302
if( ${X}_RESULT_SUCCESS != result && ${X}_RESULT_ERROR_ADAPTER_SPECIFIC != result )
269303
return result;

0 commit comments

Comments
 (0)