@@ -3,12 +3,36 @@ import GPUArrays: allowscalar, @allowscalar
3
3
4
4
# # unified memory indexing
5
5
6
- # TODO : needs to think about coherency -- otherwise this might crash since it doesn't sync
7
- # also, this optim would be relevant for CuArray<->Array memcpy as well.
6
+ const coherent = Ref (true )
7
+
8
+ # toggle coherency based on API calls
9
+ function set_coherency (apicall)
10
+ # TODO : whitelist
11
+ coherent[] = false
12
+ return
13
+ end
14
+
15
+ function force_coherency ()
16
+ # TODO : not on newer hardware with certain flags
17
+
18
+ if CUDAdrv. apicall_hook[] != = set_coherency
19
+ # we didn't have our API call hook in place, all bets are off
20
+ coherent[] = false
21
+ end
22
+
23
+ if ! coherent[]
24
+ CUDAdrv. synchronize ()
25
+ coherent[] = true
26
+ elseif CUDAdrv. apicall_hook[] === nothing
27
+ # nobody else is hooking for CUDA API calls, so we can safely install ours
28
+ CUDAdrv. apicall_hook[] = set_coherency
29
+ end
30
+ end
8
31
9
32
function GPUArrays. _getindex (xs:: CuArray{T} , i:: Integer ) where T
10
33
buf = buffer (xs)
11
34
if isa (buf, Mem. UnifiedBuffer)
35
+ force_coherency ()
12
36
ptr = convert (Ptr{T}, buffer (xs))
13
37
unsafe_load (ptr, i)
14
38
else
21
45
function GPUArrays. _setindex! (xs:: CuArray{T} , v:: T , i:: Integer ) where T
22
46
buf = buffer (xs)
23
47
if isa (buf, Mem. UnifiedBuffer)
48
+ force_coherency ()
24
49
ptr = convert (Ptr{T}, buffer (xs))
25
50
unsafe_store! (ptr, v, i)
26
51
else
0 commit comments