@@ -134,17 +134,15 @@ Base.collect(X::AnyGPUArray) = collect_to_cpu(X)
134
134
135
135
# memory copying
136
136
137
+ # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
138
+ # offset and length) from and to CPU arrays and between GPU arrays.
139
+
137
140
function Base. copy! (dst:: AbstractGPUVector , src:: AbstractGPUVector )
138
141
axes (dst) == axes (src) || throw (ArgumentError (
139
142
" arrays must have the same axes for `copy!`. consider using `copyto!` instead" ))
140
143
copyto! (dst, src)
141
144
end
142
145
143
- # # basic linear copies of identically-typed memory
144
-
145
- # expects the GPU array type to have linear `copyto!` methods (i.e. accepting an integer
146
- # offset and length) from and to CPU arrays and between GPU arrays.
147
-
148
146
for (D, S) in ((AnyGPUArray, Array),
149
147
(Array, AnyGPUArray),
150
148
(AnyGPUArray, AnyGPUArray))
@@ -156,18 +154,6 @@ for (D, S) in ((AnyGPUArray, Array),
156
154
copyto! (dest, drange, src, srange)
157
155
end
158
156
159
- function Base. copyto! (dest:: $D , d_range:: CartesianIndices{1} ,
160
- src:: $S , s_range:: CartesianIndices{1} )
161
- len = length (d_range)
162
- if length (s_range) != len
163
- throw (ArgumentError (" Copy range needs same length. Found: dest: $len , src: $(length (s_range)) " ))
164
- end
165
- len == 0 && return dest
166
- d_offset = first (d_range)[1 ]
167
- s_offset = first (s_range)[1 ]
168
- copyto! (dest, d_offset, src, s_offset, len)
169
- end
170
-
171
157
Base. copyto! (dest:: $D , src:: $S ) = copyto! (dest, 1 , src, 1 , length (src))
172
158
end
173
159
end
@@ -253,6 +239,13 @@ function Base.copyto!(dest::AnyGPUArray{<:Any, N}, destcrange::CartesianIndices{
253
239
len = length (destcrange)
254
240
len == 0 && return dest
255
241
242
+ # linear copy if we can
243
+ if N == 1
244
+ d_offset = first (destcrange)[1 ]
245
+ s_offset = first (srccrange)[1 ]
246
+ return copyto! (dest, d_offset, src, s_offset, len)
247
+ end
248
+
256
249
dest_offsets = first (destcrange) - oneunit (CartesianIndex{N})
257
250
src_offsets = first (srccrange) - oneunit (CartesianIndex{N})
258
251
kernel = cartesian_copy_kernel! (get_backend (dest))
@@ -267,6 +260,15 @@ for (dstTyp, srcTyp) in (AbstractGPUArray=>Array, Array=>AbstractGPUArray)
267
260
if size (dstrange) != size (srcrange)
268
261
throw (ArgumentError (" source and destination must have same size (got $(size (srcrange)) and $(size (dstrange)) )" ))
269
262
end
263
+ len = length (dstrange)
264
+ len == 0 && return dest
265
+
266
+ # linear copy if we can
267
+ if N == 1
268
+ d_offset = first (dstrange)[1 ]
269
+ s_offset = first (srcrange)[1 ]
270
+ return copyto! (dst, d_offset, src, s_offset, len)
271
+ end
270
272
271
273
# figure out how many dimensions of the Cartesian ranges map onto contiguous memory
272
274
# in both source and destination. we will copy these one by one as linear ranges.
0 commit comments