From c10ca5dcf42d09eb3ee350e61d073a85a97577c4 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 8 Jul 2025 12:17:31 +0200 Subject: [PATCH 1/4] Add fast method for count/sum of view of BitArray The dispatch to this method is not ideal, because it does not handle multi- dimensional views, nor discontiguous views. Ideally, we would have an internal API that, given a view, would iterate all contiguous sub-views. Then this new method could be called for all the conti- guous chunks. --- base/bitarray.jl | 31 +++++++++++++++++++++++++++++++ test/bitarray.jl | 5 +++++ 2 files changed, 36 insertions(+) diff --git a/base/bitarray.jl b/base/bitarray.jl index 5a3469fa7c7a2..e7341171a989c 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -1451,6 +1451,37 @@ function bitcount(Bc::Vector{UInt64}; init::T=0) where {T} return n end +function _count( + ::typeof(identity), + v::SubArray{Bool, N, <:BitArray, <:Tuple{Union{Integer, AbstractUnitRange}}, true}, + ::Colon, + init::T + ) where {N, T} + pi = only(parentindices(v)) + (fst, lst) = (first(pi), last(pi)) + fst > lst && return init + chunks = parent(v).chunks + + # Mask away the bits in the chunks not inside the view + mask1 = typemax(UInt64) << ((fst - 1) & 63) + mask2 = typemax(UInt64) >> ((64 - lst) & 63) + start_index = ((fst - 1) >>> 6) + 1 + stop_index = ((lst - 1) >>> 6) + 1 + # If the whole view is contained in one chunk, then mask it from both sides + if start_index == stop_index + return (init + count_ones(@inbounds chunks[start_index] & mask1 & mask2)) % T + end + # Else, mask first and last chunk individually, then add all whole chunks + # in a separate loop below. + n = init + count_ones(@inbounds chunks[start_index] & mask1) + n += count_ones(@inbounds chunks[stop_index] & mask2) + for i in (start_index + 1):(stop_index - 1) + n += count_ones(@inbounds chunks[i]) + end + return n % T +end + + _count(::typeof(identity), B::BitArray, ::Colon, init) = bitcount(B.chunks; init) function unsafe_bitfindnext(Bc::Vector{UInt64}, start::Int) diff --git a/test/bitarray.jl b/test/bitarray.jl index fd5c1421a256f..7e20dc4254bb4 100644 --- a/test/bitarray.jl +++ b/test/bitarray.jl @@ -1303,6 +1303,11 @@ timesofar("datamove") @test count(trues(2, 2), init=0x03) === 0x07 @test count(trues(2, 2, 2), dims=2) == fill(2, 2, 1, 2) + + m = bitrand(25, 25) + for idx in Any[0x03, 5, 21:42, 7:6, :, 10:407, 64:70, 65:127, 315:384, Base.OneTo(111)] + @test count(m[idx]) == count(view(m, idx)) + end end timesofar("find") From 64b4ff322081552d85d0fc34f194af1752193148 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 8 Jul 2025 15:32:50 +0200 Subject: [PATCH 2/4] Bootstrap: Move new method --- base/bitarray.jl | 31 ------------------------------- base/subarray.jl | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/base/bitarray.jl b/base/bitarray.jl index e7341171a989c..5a3469fa7c7a2 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -1451,37 +1451,6 @@ function bitcount(Bc::Vector{UInt64}; init::T=0) where {T} return n end -function _count( - ::typeof(identity), - v::SubArray{Bool, N, <:BitArray, <:Tuple{Union{Integer, AbstractUnitRange}}, true}, - ::Colon, - init::T - ) where {N, T} - pi = only(parentindices(v)) - (fst, lst) = (first(pi), last(pi)) - fst > lst && return init - chunks = parent(v).chunks - - # Mask away the bits in the chunks not inside the view - mask1 = typemax(UInt64) << ((fst - 1) & 63) - mask2 = typemax(UInt64) >> ((64 - lst) & 63) - start_index = ((fst - 1) >>> 6) + 1 - stop_index = ((lst - 1) >>> 6) + 1 - # If the whole view is contained in one chunk, then mask it from both sides - if start_index == stop_index - return (init + count_ones(@inbounds chunks[start_index] & mask1 & mask2)) % T - end - # Else, mask first and last chunk individually, then add all whole chunks - # in a separate loop below. - n = init + count_ones(@inbounds chunks[start_index] & mask1) - n += count_ones(@inbounds chunks[stop_index] & mask2) - for i in (start_index + 1):(stop_index - 1) - n += count_ones(@inbounds chunks[i]) - end - return n % T -end - - _count(::typeof(identity), B::BitArray, ::Colon, init) = bitcount(B.chunks; init) function unsafe_bitfindnext(Bc::Vector{UInt64}, start::Int) diff --git a/base/subarray.jl b/base/subarray.jl index 3a0be7d82b981..7ecd1224b91b3 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -543,3 +543,35 @@ end # XXX: this is considerably more unsafe than the other similarly named methods unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s)) + +# This function is placed here because bitarray.jl is run in bootstrap before SubArray +# is defined. +function _count( + ::typeof(identity), + v::SubArray{Bool, N, <:BitArray, <:Tuple{Union{Integer, AbstractUnitRange}}, true}, + ::Colon, + init::T + ) where {N, T} + pi = only(parentindices(v)) + (fst, lst) = (first(pi), last(pi)) + fst > lst && return init + chunks = parent(v).chunks + + # Mask away the bits in the chunks not inside the view + mask1 = typemax(UInt64) << ((fst - 1) & 63) + mask2 = typemax(UInt64) >> ((64 - lst) & 63) + start_index = ((fst - 1) >>> 6) + 1 + stop_index = ((lst - 1) >>> 6) + 1 + # If the whole view is contained in one chunk, then mask it from both sides + if start_index == stop_index + return (init + count_ones(@inbounds chunks[start_index] & mask1 & mask2)) % T + end + # Else, mask first and last chunk individually, then add all whole chunks + # in a separate loop below. + n = init + count_ones(@inbounds chunks[start_index] & mask1) + n += count_ones(@inbounds chunks[stop_index] & mask2) + for i in (start_index + 1):(stop_index - 1) + n += count_ones(@inbounds chunks[i]) + end + return n % T +end From e9bae1b846af05e9c401d54b5f7fabca4d04a381 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Tue, 8 Jul 2025 16:50:53 +0200 Subject: [PATCH 3/4] Use existing helper functions --- base/subarray.jl | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/base/subarray.jl b/base/subarray.jl index 7ecd1224b91b3..c57047ee427d2 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -550,26 +550,31 @@ function _count( ::typeof(identity), v::SubArray{Bool, N, <:BitArray, <:Tuple{Union{Integer, AbstractUnitRange}}, true}, ::Colon, - init::T - ) where {N, T} + init::Integer + ) where N + T = typeof(init) pi = only(parentindices(v)) - (fst, lst) = (first(pi), last(pi)) + (fst, lst) = (Int(first(pi))::Int, Int(last(pi))::Int) fst > lst && return init chunks = parent(v).chunks # Mask away the bits in the chunks not inside the view - mask1 = typemax(UInt64) << ((fst - 1) & 63) - mask2 = typemax(UInt64) >> ((64 - lst) & 63) - start_index = ((fst - 1) >>> 6) + 1 - stop_index = ((lst - 1) >>> 6) + 1 + (start_index, start_nmask) = get_chunks_id(fst) + (stop_index, stop_nmask) = get_chunks_id(lst) + + mask_start = typemax(UInt64) << ((fst - 1) & 63) + mask_end = _msk_end(lst) + start_index = _div64(fst - 1) + 1 + stop_index = _div64(lst - 1) + 1 # If the whole view is contained in one chunk, then mask it from both sides if start_index == stop_index - return (init + count_ones(@inbounds chunks[start_index] & mask1 & mask2)) % T + in_chunk = count_ones(@inbounds chunks[start_index] & mask_start & mask_end) + return (init + in_chunk) % T end # Else, mask first and last chunk individually, then add all whole chunks # in a separate loop below. - n = init + count_ones(@inbounds chunks[start_index] & mask1) - n += count_ones(@inbounds chunks[stop_index] & mask2) + n = init + count_ones(@inbounds chunks[start_index] & mask_start) + n += count_ones(@inbounds chunks[stop_index] & mask_end) for i in (start_index + 1):(stop_index - 1) n += count_ones(@inbounds chunks[i]) end From 2f065fbfd4372185fca74a070805309efb789fec Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Thu, 10 Jul 2025 20:49:47 +0200 Subject: [PATCH 4/4] Broaden bitview type and clean up code a little --- base/subarray.jl | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/base/subarray.jl b/base/subarray.jl index c57047ee427d2..5d01340711c69 100644 --- a/base/subarray.jl +++ b/base/subarray.jl @@ -544,14 +544,22 @@ end # XXX: this is considerably more unsafe than the other similarly named methods unsafe_wrap(::Type{Vector{UInt8}}, s::FastContiguousSubArray{UInt8,1,Vector{UInt8}}) = unsafe_wrap(Vector{UInt8}, pointer(s), size(s)) +const ContiguousBitView = SubArray{ + Bool, + N, + <:BitArray, + <:Union{Tuple{Vararg{Real}}, Tuple{AbstractUnitRange, Vararg{Any}}}, + true +} where N + # This function is placed here because bitarray.jl is run in bootstrap before SubArray # is defined. function _count( ::typeof(identity), - v::SubArray{Bool, N, <:BitArray, <:Tuple{Union{Integer, AbstractUnitRange}}, true}, + v::ContiguousBitView, ::Colon, init::Integer - ) where N + ) T = typeof(init) pi = only(parentindices(v)) (fst, lst) = (Int(first(pi))::Int, Int(last(pi))::Int) @@ -559,11 +567,9 @@ function _count( chunks = parent(v).chunks # Mask away the bits in the chunks not inside the view - (start_index, start_nmask) = get_chunks_id(fst) - (stop_index, stop_nmask) = get_chunks_id(lst) - mask_start = typemax(UInt64) << ((fst - 1) & 63) mask_end = _msk_end(lst) + start_index = _div64(fst - 1) + 1 stop_index = _div64(lst - 1) + 1 # If the whole view is contained in one chunk, then mask it from both sides