Skip to content

Commit 8b04a4e

Browse files
Bug fix: events would get created on the wrong device in a multi-gpu scenario. (#3910)
Currently the GPU events are being created without setting the device, which would thus fail for a multi-gpu setup.
1 parent 486cdfb commit 8b04a4e

File tree

2 files changed

+17
-14
lines changed

2 files changed

+17
-14
lines changed

src/targets/gpu/hip.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
/*
22
* The MIT License (MIT)
33
*
4-
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
4+
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
55
*
66
* Permission is hereby granted, free of charge, to any person obtaining a copy
77
* of this software and associated documentation files (the "Software"), to deal
@@ -237,7 +237,7 @@ void set_device(std::size_t id)
237237
{
238238
auto status = hipSetDevice(id);
239239
if(status != hipSuccess)
240-
MIGRAPHX_THROW("Error setting device");
240+
MIGRAPHX_THROW("Error setting device: " + hip_error(status));
241241
}
242242

243243
void gpu_sync()

src/targets/gpu/include/migraphx/gpu/context.hpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,10 @@ struct hip_device
5959
{
6060
auto status = hipGetDeviceProperties(&device_props, device_id);
6161
if(status != hipSuccess)
62-
MIGRAPHX_THROW("Failed to allocate stream");
62+
MIGRAPHX_THROW("Failed to get device properties: " + hip_error(status));
63+
64+
// Set the device prior to Events that get created within a Context.
65+
set_device(device_id);
6366

6467
for(std::size_t i = 0; i < n; i++)
6568
add_stream();
@@ -80,7 +83,7 @@ struct hip_device
8083
hipStream_t result = nullptr;
8184
auto status = hipStreamCreateWithFlags(&result, hipStreamNonBlocking);
8285
if(status != hipSuccess)
83-
MIGRAPHX_THROW("Failed to allocate stream");
86+
MIGRAPHX_THROW("Failed to allocate stream: " + hip_error(status));
8487
return hip_stream_ptr{result};
8588
}
8689

@@ -147,23 +150,23 @@ struct hip_device
147150
setup();
148151
auto status = hipStreamSynchronize(s.get());
149152
if(status != hipSuccess)
150-
MIGRAPHX_THROW("Failed to wait.");
153+
MIGRAPHX_THROW("Failed to wait: " + hip_error(status));
151154
}
152155

153156
void wait(hipEvent_t event)
154157
{
155158
setup();
156159
auto status = hipStreamWaitEvent(get(), event, 0);
157160
if(status != hipSuccess)
158-
MIGRAPHX_THROW("Failed to wait.");
161+
MIGRAPHX_THROW("Failed to wait: " + hip_error(status));
159162
}
160163

161164
void record(hipEvent_t event)
162165
{
163166
setup();
164167
auto status = hipEventRecord(event, get());
165168
if(status != hipSuccess)
166-
MIGRAPHX_THROW("Failed to record.");
169+
MIGRAPHX_THROW("Failed to record: " + hip_error(status));
167170
}
168171

169172
private:
@@ -293,7 +296,7 @@ struct context
293296
hipEvent_t event;
294297
auto status = hipEventCreateWithFlags(&event, hipEventDisableTiming);
295298
if(status != hipSuccess)
296-
MIGRAPHX_THROW("Failed to create event");
299+
MIGRAPHX_THROW("Failed to create event: " + hip_error(status));
297300
return hip_event_ptr{event};
298301
}
299302

@@ -302,7 +305,7 @@ struct context
302305
hipEvent_t event;
303306
auto status = hipEventCreate(&event);
304307
if(status != hipSuccess)
305-
MIGRAPHX_THROW("Failed to create event");
308+
MIGRAPHX_THROW("Failed to create event: " + hip_error(status));
306309
return hip_event_ptr{event};
307310
}
308311

@@ -332,7 +335,7 @@ struct context
332335
{
333336
auto status = hipEventRecord(begin_event.get(), queue.get<hipStream_t>());
334337
if(status != hipSuccess)
335-
MIGRAPHX_THROW("failed to record " + hip_error(status));
338+
MIGRAPHX_THROW("Failed to record: " + hip_error(status));
336339

337340
get_stream().wait(begin_event.get());
338341
}
@@ -343,7 +346,7 @@ struct context
343346

344347
auto status = hipStreamWaitEvent(queue.get<hipStream_t>(), finish_event.get(), 0);
345348
if(status != hipSuccess)
346-
MIGRAPHX_THROW("Failed to wait on event " + hip_error(status));
349+
MIGRAPHX_THROW("Failed to wait on event: " + hip_error(status));
347350
}
348351

349352
any_ptr get_queue() { return get_stream().get(); }
@@ -383,9 +386,9 @@ struct context
383386
// for event perf timing
384387
shared<hip_event_ptr> start_event = nullptr;
385388
shared<hip_event_ptr> stop_event = nullptr;
386-
// for stream syncronization
387-
shared<hip_event_ptr> begin_event = nullptr;
388-
shared<hip_event_ptr> finish_event = nullptr;
389+
// for stream synchronization
390+
shared<hip_event_ptr> begin_event = nullptr;
391+
shared<hip_event_ptr> finish_event = nullptr;
389392
std::shared_ptr<auto_save_problem_cache> pc = nullptr;
390393
};
391394

0 commit comments

Comments
 (0)