@@ -134,17 +134,15 @@ Base.collect(X::AnyGPUArray) = collect_to_cpu(X)
134
134
135
135
# memory copying
136
136
137
+ # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
138
+ # offset and length) from and to CPU arrays and between GPU arrays.
139
+
137
140
function Base. copy! (dst:: AbstractGPUVector , src:: AbstractGPUVector )
138
141
axes (dst) == axes (src) || throw (ArgumentError (
139
142
" arrays must have the same axes for `copy!`. consider using `copyto!` instead" ))
140
143
copyto! (dst, src)
141
144
end
142
145
143
- # # basic linear copies of identically-typed memory
144
-
145
- # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
146
- # offset and length) from and to CPU arrays and between GPU arrays.
147
-
148
146
for (D, S) in ((AnyGPUArray, Array),
149
147
(Array, AnyGPUArray),
150
148
(AnyGPUArray, AnyGPUArray))
@@ -156,18 +154,6 @@ for (D, S) in ((AnyGPUArray, Array),
156
154
copyto! (dest, drange, src, srange)
157
155
end
158
156
159
- function Base. copyto! (dest:: $D , d_range:: CartesianIndices{1} ,
160
- src:: $S , s_range:: CartesianIndices{1} )
161
- len = length (d_range)
162
- if length (s_range) != len
163
- throw (ArgumentError (" Copy range needs same length. Found: dest: $len , src: $(length (s_range)) " ))
164
- end
165
- len == 0 && return dest
166
- d_offset = first (d_range)[1 ]
167
- s_offset = first (s_range)[1 ]
168
- copyto! (dest, d_offset, src, s_offset, len)
169
- end
170
-
171
157
Base. copyto! (dest:: $D , src:: $S ) = copyto! (dest, 1 , src, 1 , length (src))
172
158
end
173
159
end
@@ -260,6 +246,13 @@ function Base.copyto!(dest::AnyGPUArray{<:Any, N}, destcrange::CartesianIndices{
260
246
len = length (destcrange)
261
247
len == 0 && return dest
262
248
249
+ # linear copy if we can
250
+ if N == 1
251
+ d_offset = first (destcrange)[1 ]
252
+ s_offset = first (srccrange)[1 ]
253
+ return copyto! (dest, d_offset, src, s_offset, len)
254
+ end
255
+
263
256
dest_offsets = first (destcrange) - oneunit (CartesianIndex{N})
264
257
src_offsets = first (srccrange) - oneunit (CartesianIndex{N})
265
258
gpu_call (cartesian_copy_kernel!,
@@ -275,6 +268,15 @@ for (dstTyp, srcTyp) in (AbstractGPUArray=>Array, Array=>AbstractGPUArray)
275
268
if size (dstrange) != size (srcrange)
276
269
throw (ArgumentError (" source and destination must have same size (got $(size (srcrange)) and $(size (dstrange)) )" ))
277
270
end
271
+ len = length (dstrange)
272
+ len == 0 && return dest
273
+
274
+ # linear copy if we can
275
+ if N == 1
276
+ d_offset = first (dstrange)[1 ]
277
+ s_offset = first (srcrange)[1 ]
278
+ return copyto! (dst, d_offset, src, s_offset, len)
279
+ end
278
280
279
281
# figure out how many dimensions of the Cartesian ranges map onto contiguous memory
280
282
# in both source and destination. we will copy these one by one as linear ranges.
0 commit comments