Skip to content

Commit 9d35b32

Browse files
committed
Changed naming nad removed boolean var
1 parent c9e5a98 commit 9d35b32

File tree

6 files changed

+25
-26
lines changed

6 files changed

+25
-26
lines changed

source/adapters/cuda/event.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ ur_result_t ur_event_handle_t_::start() {
5555

5656
try {
5757
if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE || isTimestampEvent()) {
58-
UR_CHECK_ERROR(cuEventRecord(EvQueued, Queue->getProfilingStream()));
58+
UR_CHECK_ERROR(cuEventRecord(EvQueued, Queue->getHostSubmitTimeStream()));
5959
UR_CHECK_ERROR(cuEventRecord(EvStart, Stream));
6060
}
6161
} catch (ur_result_t Err) {

source/adapters/cuda/queue.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -201,9 +201,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
201201
UR_CHECK_ERROR(cuStreamDestroy(S));
202202
});
203203

204-
if (hQueue->IsProfStreamCreated) {
205-
UR_CHECK_ERROR(cuStreamSynchronize(hQueue->getProfilingStream()));
206-
UR_CHECK_ERROR(cuStreamDestroy(hQueue->getProfilingStream()));
204+
if (hQueue->getHostSubmitTimeStream() != CUstream{0}) {
205+
UR_CHECK_ERROR(cuStreamSynchronize(hQueue->getHostSubmitTimeStream()));
206+
UR_CHECK_ERROR(cuStreamDestroy(hQueue->getHostSubmitTimeStream()));
207207
}
208208

209209
return UR_RESULT_SUCCESS;

source/adapters/cuda/queue.hpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,9 @@ struct ur_queue_handle_t_ {
2929

3030
std::vector<native_type> ComputeStreams;
3131
std::vector<native_type> TransferStreams;
32-
// Stream used solely when profiling is enabled
33-
native_type ProfStream;
34-
bool IsProfStreamCreated{false};
35-
std::once_flag ProfStreamFlag;
32+
// Stream used for recording EvQueue. It is created only if profiling is
33+
// enabled - either for the queue or per event.
34+
native_type HostSubmitTimeStream{0};
3635
// delay_compute_ keeps track of which streams have been recently reused and
3736
// their next use should be delayed. If a stream has been recently reused it
3837
// will be skipped the next time it would be selected round-robin style. When
@@ -108,14 +107,14 @@ struct ur_queue_handle_t_ {
108107
// Function which creates the profiling stream. Called only if profiling is
109108
// enabled.
110109
void createProfilingStream() {
111-
std::call_once(ProfStreamFlag, [&]() {
112-
UR_CHECK_ERROR(
113-
cuStreamCreateWithPriority(&ProfStream, CU_STREAM_NON_BLOCKING, 0));
114-
IsProfStreamCreated = true;
110+
static std::once_flag HostSubmitTimeStreamFlag;
111+
std::call_once(HostSubmitTimeStreamFlag, [&]() {
112+
UR_CHECK_ERROR(cuStreamCreateWithPriority(&HostSubmitTimeStream,
113+
CU_STREAM_NON_BLOCKING, 0));
115114
});
116115
}
117116

118-
native_type getProfilingStream() { return ProfStream; }
117+
native_type getHostSubmitTimeStream() { return HostSubmitTimeStream; }
119118

120119
bool hasBeenSynchronized(uint32_t StreamToken) {
121120
// stream token not associated with one of the compute streams

source/adapters/hip/event.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ ur_result_t ur_event_handle_t_::start() {
5050

5151
try {
5252
if (Queue->URFlags & UR_QUEUE_FLAG_PROFILING_ENABLE || isTimestampEvent()) {
53-
UR_CHECK_ERROR(hipEventRecord(EvQueued, Queue->getProfilingStream()));
53+
UR_CHECK_ERROR(
54+
hipEventRecord(EvQueued, Queue->getHostSubmitTimeStream()));
5455
UR_CHECK_ERROR(hipEventRecord(EvStart, Stream));
5556
}
5657
} catch (ur_result_t Error) {

source/adapters/hip/queue.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -222,9 +222,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueRelease(ur_queue_handle_t hQueue) {
222222
UR_CHECK_ERROR(hipStreamDestroy(S));
223223
});
224224

225-
if (hQueue->IsProfStreamCreated) {
226-
UR_CHECK_ERROR(hipStreamSynchronize(hQueue->getProfilingStream()));
227-
UR_CHECK_ERROR(hipStreamDestroy(hQueue->getProfilingStream()));
225+
if (hQueue->getHostSubmitTimeStream() != hipStream_t{0}) {
226+
UR_CHECK_ERROR(hipStreamSynchronize(hQueue->getHostSubmitTimeStream()));
227+
UR_CHECK_ERROR(hipStreamDestroy(hQueue->getHostSubmitTimeStream()));
228228
}
229229

230230
return UR_RESULT_SUCCESS;

source/adapters/hip/queue.hpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ struct ur_queue_handle_t_ {
2525

2626
std::vector<native_type> ComputeStreams;
2727
std::vector<native_type> TransferStreams;
28-
// Stream used solely when profiling is enabled
29-
native_type ProfStream;
30-
bool IsProfStreamCreated{false};
31-
std::once_flag ProfStreamFlag;
28+
// Stream used for recording EvQueue. It is created only if profiling is
29+
// enabled - either for the queue or per event.
30+
native_type HostSubmitTimeStream{0};
3231
// DelayCompute keeps track of which streams have been recently reused and
3332
// their next use should be delayed. If a stream has been recently reused it
3433
// will be skipped the next time it would be selected round-robin style. When
@@ -105,13 +104,13 @@ struct ur_queue_handle_t_ {
105104
// Function which creates the profiling stream. Called only if profiling is
106105
// enabled.
107106
void createProfilingStream() {
108-
std::call_once(ProfStreamFlag, [&]() {
109-
UR_CHECK_ERROR(
110-
hipStreamCreateWithFlags(&ProfStream, hipStreamNonBlocking));
111-
IsProfStreamCreated = true;
107+
static std::once_flag HostSubmitTimeStreamFlag;
108+
std::call_once(HostSubmitTimeStreamFlag, [&]() {
109+
UR_CHECK_ERROR(hipStreamCreateWithFlags(&HostSubmitTimeStream,
110+
hipStreamNonBlocking));
112111
});
113112
}
114-
native_type getProfilingStream() { return ProfStream; }
113+
native_type getHostSubmitTimeStream() { return HostSubmitTimeStream; }
115114

116115
bool hasBeenSynchronized(uint32_t StreamToken) {
117116
// stream token not associated with one of the compute streams

0 commit comments

Comments
 (0)