@@ -66,11 +66,16 @@ function Random.rand!(rng::RNG, A::AnyCuArray)
66
66
return
67
67
end
68
68
69
- kernel = @cuda launch= false name= " rand!" kernel (A, rng. seed, rng. counter)
70
- config = launch_configuration (kernel. fun; max_threads= 64 )
71
- threads = max (32 , min (config. threads, length (A)))
72
- blocks = min (config. blocks, cld (length (A), threads))
73
- kernel (A, rng. seed, rng. counter; threads, blocks)
69
+ # XXX : because of how random numbers are generated, the launch configuration
70
+ # affects the results. as such, use a constant number of threads, set
71
+ # very low for compatibility, and a deterministic number of blocks.
72
+ # this is not ideal, but otherwise generated numbers have observed to
73
+ # be different between otherwise identical inputs (eltype, dims)
74
+ # depending on whether it was a direct CuArray or a wrapped SubArray.
75
+ threads = 32
76
+ blocks = cld (length (A), threads)
77
+
78
+ @cuda threads= threads blocks= blocks name= " rand!" kernel (A, rng. seed, rng. counter)
74
79
75
80
new_counter = Int64 (rng. counter) + length (A)
76
81
overflow, remainder = fldmod (new_counter, typemax (UInt32))
0 commit comments