@@ -38,14 +38,8 @@ function HIPBuffer(bytesize; stream::HIP.HIPStream)
38
38
ptr = alloc_or_retry! (isnothing; stream) do
39
39
try
40
40
# Try to allocate.
41
- # NOTE Async is ~300x slower for small (≤ 16 bytes) allocations:
42
- # https://github.com/ROCm/HIP/issues/3370#issuecomment-1842938966
43
- if bytesize > 16
44
- HIP. hipMallocAsync (ptr_ref, bytesize, stream) |> HIP. check
45
- # HIP.hipMallocFromPoolAsync(ptr_ref, bytesize, pool, stream) |> HIP.check
46
- else
47
- HIP. hipMalloc (ptr_ref, bytesize) |> HIP. check
48
- end
41
+ HIP. hipMallocAsync (ptr_ref, bytesize, stream) |> HIP. check
42
+ # HIP.hipMallocFromPoolAsync(ptr_ref, bytesize, pool, stream) |> HIP.check
49
43
50
44
ptr = ptr_ref[]
51
45
ptr == C_NULL && throw (HIP. HIPError (HIP. hipErrorOutOfMemory))
@@ -78,11 +72,7 @@ function free(buf::HIPBuffer; stream::HIP.HIPStream)
78
72
buf. own || return
79
73
80
74
buf. ptr == C_NULL && return
81
- if buf. bytesize > 16
82
- HIP. hipFreeAsync (buf, stream) |> HIP. check
83
- else
84
- HIP. hipFree (buf) |> HIP. check
85
- end
75
+ HIP. hipFreeAsync (buf, stream) |> HIP. check
86
76
AMDGPU. account! (AMDGPU. memory_stats (buf. device), - buf. bytesize)
87
77
return
88
78
end
@@ -93,13 +83,9 @@ function upload!(dst::HIPBuffer, src::Ptr, bytesize::Int; stream::HIP.HIPStream)
93
83
return
94
84
end
95
85
96
- function download! (dst:: Ptr , src:: HIPBuffer , bytesize:: Int ; stream:: HIP.HIPStream , async :: Bool )
86
+ function download! (dst:: Ptr , src:: HIPBuffer , bytesize:: Int ; stream:: HIP.HIPStream )
97
87
bytesize == 0 && return
98
- if async
99
- HIP. hipMemcpyDtoHAsync (dst, src, bytesize, stream) |> HIP. check
100
- else
101
- HIP. hipMemcpyDtoH (dst, src, bytesize) |> HIP. check
102
- end
88
+ HIP. hipMemcpyDtoHAsync (dst, src, bytesize, stream) |> HIP. check
103
89
return
104
90
end
105
91
@@ -157,10 +143,10 @@ upload!(dst::HostBuffer, src::Ptr, sz::Int; stream::HIP.HIPStream) =
157
143
upload! (dst:: HostBuffer , src:: HIPBuffer , sz:: Int ; stream:: HIP.HIPStream ) =
158
144
HIP. memcpy (dst, src, sz, HIP. hipMemcpyDeviceToHost, stream)
159
145
160
- download! (dst:: Ptr , src:: HostBuffer , sz:: Int ; stream:: HIP.HIPStream , async :: Bool ) =
146
+ download! (dst:: Ptr , src:: HostBuffer , sz:: Int ; stream:: HIP.HIPStream ) =
161
147
HIP. memcpy (dst, src, sz, HIP. hipMemcpyHostToHost, stream)
162
148
163
- download! (dst:: HIPBuffer , src:: HostBuffer , sz:: Int ; stream:: HIP.HIPStream , async :: Bool ) =
149
+ download! (dst:: HIPBuffer , src:: HostBuffer , sz:: Int ; stream:: HIP.HIPStream ) =
164
150
HIP. memcpy (dst, src, sz, HIP. hipMemcpyHostToDevice, stream)
165
151
166
152
transfer! (dst:: HostBuffer , src:: HostBuffer , sz:: Int ; stream:: HIP.HIPStream ) =
0 commit comments