Skip to content

Commit 9eb2770

Browse files
vtjnashSeelengrabMasonProtter
authored
add sizehint! for first and make append!/prepend! safer (#51903)
First we add an optional API parameter for `sizehint!` that controls whether it is for `push!` (default) or `pushfirst!`. Secondly, we make the offset zero when using `sizehint!` to shrink an array from the end, or the trailing size zero when using it to shring from the beginning. Then we replace the prior implementations of `prepend!` and `append!` with ones that are safe even if the iterator changes length during the operation or if convert fails. The result of `prepend!` may be in an undefined order (because of the `reverse!` call) in the presence of concurrent modifications or errors, but at least all of the elements will be present and valid afterwards. Replaces and closes #49905 Replaces and closes #47391 Fixes #15868 Benchmarks show that repeated `push!` performance (with sizehint) is nearly equivalent to the old append performance: ``` julia> @benchmark append!(x, 1:1000) setup=x=Vector{Float64}(undef,0) BenchmarkTools.Trial: 10000 samples with 10 evaluations. Range (min … max): 1.027 μs … 72.871 μs ┊ GC (min … max): 0.00% … 94.57% Time (median): 1.465 μs ┊ GC (median): 0.00% Time (mean ± σ): 1.663 μs ± 1.832 μs ┊ GC (mean ± σ): 6.20% ± 5.67% ▂▃▅▆█▇▇▆▄▂▁ ▂▁▁▂▂▂▂▃▄▅▇█████████████▇▆▅▅▅▅▅▅▄▅▄▅▅▅▆▇███▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂ ▄ 1.03 μs Histogram: frequency by time 2.31 μs < Memory estimate: 19.69 KiB, allocs estimate: 0. julia> @benchmark append!(x, 1:1000) setup=x=Vector{Int}(undef,0) BenchmarkTools.Trial: 10000 samples with 10 evaluations. Range (min … max): 851.900 ns … 76.757 μs ┊ GC (min … max): 0.00% … 91.59% Time (median): 1.181 μs ┊ GC (median): 0.00% Time (mean ± σ): 1.543 μs ± 1.972 μs ┊ GC (mean ± σ): 6.75% ± 5.75% ▆█▇▃ ▂▃██████▇▅▅▄▅▅▃▂▂▂▃▃▃▂▃▃▃▂▂▂▂▂▁▂▁▂▁▂▂▂▁▁▂▂▁▁▁▁▁▁▁▂▂▂▃▃▃▃▂▂▂▂ ▃ 852 ns Histogram: frequency by time 4.07 μs < Memory estimate: 19.69 KiB, allocs estimate: 0. ``` Co-authored-by: Sukera <Seelengrab@users.noreply.github.com> Co-authored-by: MasonProtter <mason.protter@icloud.com>
1 parent 786caaa commit 9eb2770

File tree

6 files changed

+102
-41
lines changed

6 files changed

+102
-41
lines changed

base/array.jl

Lines changed: 47 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1294,11 +1294,11 @@ and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
12941294
"""
12951295
function append! end
12961296

1297-
function append!(a::Vector, items::AbstractVector)
1298-
itemindices = eachindex(items)
1299-
n = length(itemindices)
1297+
function append!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
1298+
items isa Tuple && (items = map(x -> convert(T, x), items))
1299+
n = length(items)
13001300
_growend!(a, n)
1301-
copyto!(a, length(a)-n+1, items, first(itemindices), n)
1301+
copyto!(a, length(a)-n+1, items, firstindex(items), n)
13021302
return a
13031303
end
13041304

@@ -1308,16 +1308,11 @@ push!(a::AbstractVector, iter...) = append!(a, iter)
13081308
append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
13091309

13101310
function _append!(a::AbstractVector, ::Union{HasLength,HasShape}, iter)
1311-
@_terminates_locally_meta
1312-
n = length(a)
1311+
n = Int(length(iter))::Int
13131312
i = lastindex(a)
1314-
resize!(a, n+Int(length(iter))::Int)
1315-
for (i, item) in zip(i+1:lastindex(a), iter)
1316-
if isa(a, Vector) # give better effects for builtin vectors
1317-
@_safeindex a[i] = item
1318-
else
1319-
a[i] = item
1320-
end
1313+
sizehint!(a, length(a) + n; shrink=false)
1314+
for item in iter
1315+
push!(a, item)
13211316
end
13221317
a
13231318
end
@@ -1359,15 +1354,13 @@ julia> prepend!([6], [1, 2], [3, 4, 5])
13591354
"""
13601355
function prepend! end
13611356

1362-
function prepend!(a::Vector, items::AbstractVector)
1363-
itemindices = eachindex(items)
1364-
n = length(itemindices)
1357+
function prepend!(a::Vector{T}, items::Union{AbstractVector{<:T},Tuple}) where T
1358+
items isa Tuple && (items = map(x -> convert(T, x), items))
1359+
n = length(items)
13651360
_growbeg!(a, n)
1366-
if a === items
1367-
copyto!(a, 1, items, n+1, n)
1368-
else
1369-
copyto!(a, 1, items, first(itemindices), n)
1370-
end
1361+
# in case of aliasing, the _growbeg might have shifted our data, so copy
1362+
# just the last n elements instead of all of them from the first
1363+
copyto!(a, 1, items, lastindex(items)-n+1, n)
13711364
return a
13721365
end
13731366

@@ -1379,12 +1372,14 @@ prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, ini
13791372
function _prepend!(a::Vector, ::Union{HasLength,HasShape}, iter)
13801373
@_terminates_locally_meta
13811374
require_one_based_indexing(a)
1382-
n = length(iter)
1383-
_growbeg!(a, n)
1384-
i = 0
1375+
n = Int(length(iter))::Int
1376+
sizehint!(a, length(a) + n; first=true, shrink=false)
1377+
n = 0
13851378
for item in iter
1386-
@_safeindex a[i += 1] = item
1379+
n += 1
1380+
pushfirst!(a, item)
13871381
end
1382+
reverse!(a, 1, n)
13881383
a
13891384
end
13901385
function _prepend!(a::Vector, ::IteratorSize, iter)
@@ -1441,13 +1436,17 @@ function resize!(a::Vector, nl::Integer)
14411436
end
14421437

14431438
"""
1444-
sizehint!(s, n; shrink::Bool = true) -> s
1439+
sizehint!(s, n; first::Bool=false, shrink::Bool=true) -> s
14451440
14461441
Suggest that collection `s` reserve capacity for at least `n` elements. That is, if
14471442
you expect that you're going to have to push a lot of values onto `s`, you can avoid
14481443
the cost of incremental reallocation by doing it once up front; this can improve
14491444
performance.
14501445
1446+
If `first` is true, then the reserved space is from the start of the collection, for ordered
1447+
collections. Supplying this keyword may result in an error if the collection is nor ordered
1448+
or if `pushfirst!` is not supported for this collection.
1449+
14511450
See also [`resize!`](@ref).
14521451
14531452
# Notes on the performance model
@@ -1466,40 +1465,51 @@ For types that support `sizehint!`,
14661465
4. `shrink` controls if the collection can be shrunk.
14671466
14681467
!!! compat "Julia 1.11"
1469-
The `shrink` argument was added in Julia 1.11.
1468+
The `shrink` and `first` arguments were added in Julia 1.11.
14701469
"""
14711470
function sizehint! end
14721471

1473-
function sizehint!(a::Vector, sz::Integer; shrink::Bool = true)
1472+
function sizehint!(a::Vector, sz::Integer; first::Bool=false, shrink::Bool=true)
14741473
len = length(a)
14751474
ref = a.ref
14761475
mem = ref.mem
14771476
memlen = length(mem)
1478-
offset = memoryrefoffset(ref)
1479-
sz = max(Int(sz), offset + len - 1)
1477+
sz = max(Int(sz), len)
1478+
inc = sz - len
14801479
if sz <= memlen
14811480
# if we don't save at least 1/8th memlen then its not worth it to shrink
14821481
if !shrink || memlen - sz <= div(memlen, 8)
14831482
return a
14841483
end
14851484
newmem = array_new_memory(mem, sz)
1486-
if len == 0
1487-
newref = GenericMemoryRef(newmem)
1485+
if first
1486+
newref = GenericMemoryRef(newmem, inc + 1)
14881487
else
1489-
unsafe_copyto!(newmem, offset, mem, offset, len)
1490-
newref = @inbounds GenericMemoryRef(newmem, offset)
1488+
newref = GenericMemoryRef(newmem)
14911489
end
1490+
unsafe_copyto!(newref, ref, len)
14921491
setfield!(a, :ref, newref)
1493-
else
1494-
inc = sz - len;
1492+
elseif first
1493+
_growbeg!(a, inc)
1494+
newref = getfield(a, :ref)
1495+
newref = GenericMemoryRef(newref, inc + 1)
1496+
setfield!(a, :size, (len,)) # undo the size change from _growbeg!
1497+
setfield!(a, :ref, newref) # undo the offset change from _growbeg!
1498+
else # last
14951499
_growend!(a, inc)
14961500
setfield!(a, :size, (len,)) # undo the size change from _growend!
14971501
end
14981502
a
14991503
end
15001504

15011505
# Fall-back implementation for non-shrinkable collections
1502-
sizehint!(a, sz; shrink::Bool) = sizehint!(a, sz)
1506+
# avoid defining this the normal way to avoid avoid infinite recursion
1507+
function Core.kwcall(kwargs::NamedTuple{names}, ::typeof(sizehint!), a, sz) where names
1508+
get(kwargs, :first, false)::Bool
1509+
get(kwargs, :shrink, true)::Bool
1510+
isempty(diff_names(names, (:first, :shrink))) || kwerr(kwargs, sizehint!, a, sz)
1511+
sizehint!(a, sz)
1512+
end
15031513

15041514
"""
15051515
pop!(collection) -> item

base/bitset.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,10 @@ function copy!(dest::BitSet, src::BitSet)
5555
dest
5656
end
5757

58-
sizehint!(s::BitSet, n::Integer) = (sizehint!(s.bits, (n+63) >> 6); s)
58+
function sizehint!(s::BitSet, n::Integer; first::Bool=false, shrink::Bool=true)
59+
sizehint!(s.bits, (n+63) >> 6; first, shrink)
60+
s
61+
end
5962

6063
function _bits_getindex(b::Bits, n::Int, offset::Int)
6164
ci = _div64(n) - offset + 1

base/dict.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ end
228228
return h
229229
end
230230

231-
function sizehint!(d::Dict{T}, newsz; shrink::Bool = true) where T
231+
function sizehint!(d::Dict{T}, newsz; shrink::Bool=true) where T
232232
oldsz = length(d.slots)
233233
# limit new element count to max_values of the key type
234234
newsz = min(max(newsz, length(d)), max_values(T)::Int)
@@ -775,7 +775,7 @@ function map!(f, iter::ValueIterator{<:Dict})
775775
end
776776

777777
function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
778-
haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink = false)
778+
haslength(d2) && sizehint!(d1, length(d1) + length(d2), shrink=false)
779779
for (k, v) in d2
780780
i, sh = ht_keyindex2_shorthash!(d1, k)
781781
if i > 0

base/set.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ copymutable(s::Set{T}) where {T} = Set{T}(s)
135135
# Set is the default mutable fall-back
136136
copymutable(s::AbstractSet{T}) where {T} = Set{T}(s)
137137

138-
sizehint!(s::Set, newsz; shrink::Bool = true) = (sizehint!(s.dict, newsz, shrink = shrink); s)
138+
sizehint!(s::Set, newsz; shrink::Bool=true) = (sizehint!(s.dict, newsz; shrink); s)
139139
empty!(s::Set) = (empty!(s.dict); s)
140140
rehash!(s::Set) = (rehash!(s.dict); s)
141141

test/abstractarray.jl

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,28 @@ Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
13541354
@test tpa.data == reverse(collect(1:6))
13551355
end
13561356

1357+
mutable struct SimpleArray{T} <: AbstractVector{T}
1358+
els::Vector{T}
1359+
end
1360+
Base.size(sa::SimpleArray) = size(sa.els)
1361+
Base.getindex(sa::SimpleArray, idx...) = getindex(sa.els, idx...)
1362+
Base.setindex!(sa::SimpleArray, v, idx...) = setindex!(sa.els, v, idx...)
1363+
Base.resize!(sa::SimpleArray, n) = resize!(sa.els, n)
1364+
Base.copy(sa::SimpleArray) = SimpleArray(copy(sa.els))
1365+
1366+
isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
1367+
using .Main.OffsetArrays
1368+
1369+
@testset "Failing `$f` should not grow the array $a" for
1370+
f in (push!, append!, pushfirst!, prepend!),
1371+
a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
1372+
for args in ((1,), (1,2), ([1], [2]), [1])
1373+
orig = copy(a)
1374+
@test_throws Exception f(a, args...)
1375+
@test a == orig
1376+
end
1377+
end
1378+
13571379
@testset "splatting into hvcat" begin
13581380
t = (1, 2)
13591381
@test [t...; 3 4] == [1 2; 3 4]

test/arrayops.jl

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1786,6 +1786,32 @@ end
17861786
# offset array
17871787
@test append!([1,2], OffsetArray([9,8], (-3,))) == [1,2,9,8]
17881788
@test prepend!([1,2], OffsetArray([9,8], (-3,))) == [9,8,1,2]
1789+
1790+
# Error recovery
1791+
A = [1, 2]
1792+
@test_throws MethodError append!(A, [1, 2, "hi"])
1793+
@test A == [1, 2, 1, 2]
1794+
1795+
oA = OffsetVector(A, 0:3)
1796+
@test_throws InexactError append!(oA, [1, 2, 3.01])
1797+
@test oA == OffsetVector([1, 2, 1, 2, 1, 2], 0:5)
1798+
1799+
@test_throws InexactError append!(A, (x for x in [1, 2, 3.1]))
1800+
@test A == [1, 2, 1, 2, 1, 2, 1, 2]
1801+
1802+
@test_throws InexactError append!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
1803+
@test A == [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
1804+
1805+
@test_throws MethodError prepend!(A, [1, 2, "hi"])
1806+
@test A == [2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2]
1807+
1808+
A = [1, 2]
1809+
@test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1]))
1810+
@test A == [2, 1, 1, 2]
1811+
1812+
A = [1, 2]
1813+
@test_throws InexactError prepend!(A, (x for x in [1, 2, 3.1] if isfinite(x)))
1814+
@test A == [2, 1, 1, 2]
17891815
end
17901816

17911817
let A = [1,2]

0 commit comments

Comments
 (0)