From a2adce0049e030ca29c62375f0d2845de89e37f1 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Tue, 31 Mar 2020 11:12:47 +0200 Subject: [PATCH 1/3] Fix minimum/maximum over dimensions with missing values for `BigInt` Types such as `BigInt` don't support `typemin` and `typemax` so the current method to find an initial value different from `missing` fails. Use the largest/smallest non-missing value to initialize the array instead. This is an inefficient approach. Faster alternatives would be to avoid using an initial value at all, and instead keep track of whether a value has been set in a separate mask; or to use `typemax`/`typemin` for types that support them. --- base/reducedim.jl | 11 +++++++---- test/reduce.jl | 10 ++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index f5a22940310cf..4c63ce8424faf 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -146,15 +146,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min T = _realtype(f, promote_union(eltype(A))) Tr = v0 isa T ? T : typeof(v0) - # but NaNs and missing need to be avoided as initial values + # but NaNs, missing and unordered values need to be avoided as initial values if v0 isa Number && isnan(v0) # v0 is NaN v0 = oftype(v0, $initval) - elseif isunordered(v0) - # v0 is missing or a third-party unordered value + elseif ismissing(v0) + if !all(ismissing, A) + v0 = mapreduce(f, $f2, skipmissing(A)) + end + elseif isunordered(v0) # v0 is a third-party unordered value Tnm = nonmissingtype(Tr) # TODO: Some types, like BigInt, don't support typemin/typemax. - # So a Matrix{Union{BigInt, Missing}} can still error here. + # So a Matrix{Union{BigInt, T}} can still error here. v0 = $typeextreme(Tnm) end # v0 may have changed type. diff --git a/test/reduce.jl b/test/reduce.jl index 2c852084de37e..63da25bd8576e 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -377,6 +377,16 @@ end @test maximum(Vector(Int16(1):Int16(100))) === Int16(100) @test maximum(Int32[1,2]) === Int32(2) +@testset "minimum/maximum over dims with missing (#35308)" begin + for T in (Int, Float64, BigInt, BigFloat) + x = Union{T, Missing}[1 missing; 2 missing] + @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :)) + @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :)) + @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1)) + @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1)) + end +end + A = circshift(reshape(1:24,2,3,4), (0,1,1)) @test extrema(A,dims=1) == reshape([(23,24),(19,20),(21,22),(5,6),(1,2),(3,4),(11,12),(7,8),(9,10),(17,18),(13,14),(15,16)],1,3,4) @test extrema(A,dims=2) == reshape([(19,23),(20,24),(1,5),(2,6),(7,11),(8,12),(13,17),(14,18)],2,1,4) From dbb6f491c0863a62d6e2db14a2a4d2fa74690d88 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Wed, 15 Nov 2023 11:12:18 +0100 Subject: [PATCH 2/3] Better fix --- base/reducedim.jl | 19 ++++++------------- test/reduce.jl | 10 ---------- test/reducedim.jl | 15 +++++++++++---- 3 files changed, 17 insertions(+), 27 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 4c63ce8424faf..6ce8acc3fcd89 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -150,15 +150,9 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min if v0 isa Number && isnan(v0) # v0 is NaN v0 = oftype(v0, $initval) - elseif ismissing(v0) - if !all(ismissing, A) - v0 = mapreduce(f, $f2, skipmissing(A)) - end - elseif isunordered(v0) # v0 is a third-party unordered value - Tnm = nonmissingtype(Tr) - # TODO: Some types, like BigInt, don't support typemin/typemax. - # So a Matrix{Union{BigInt, T}} can still error here. - v0 = $typeextreme(Tnm) + elseif isunordered(v0) && !all(isunordered, A1) + # v0 is missing or a third-party unordered value + v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1)) end # v0 may have changed type. Tr = v0 isa T ? T : typeof(v0) @@ -189,12 +183,11 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray # but NaNs and missing need to be avoided as initial values if v0[1] isa Number && isnan(v0[1]) + # v0 is NaN v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf) - elseif isunordered(v0[1]) + elseif isunordered(v0[1]) && !all(isunordered, A1) # v0 is missing or a third-party unordered value - # TODO: Some types, like BigInt, don't support typemin/typemax. - # So a Matrix{Union{BigInt, Missing}} can still error here. - v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax)) + v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1))) end # v0 may have changed type. Tmin = v0[1] isa T ? T : typeof(v0[1]) diff --git a/test/reduce.jl b/test/reduce.jl index 63da25bd8576e..2c852084de37e 100644 --- a/test/reduce.jl +++ b/test/reduce.jl @@ -377,16 +377,6 @@ end @test maximum(Vector(Int16(1):Int16(100))) === Int16(100) @test maximum(Int32[1,2]) === Int32(2) -@testset "minimum/maximum over dims with missing (#35308)" begin - for T in (Int, Float64, BigInt, BigFloat) - x = Union{T, Missing}[1 missing; 2 missing] - @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :)) - @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :)) - @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1)) - @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1)) - end -end - A = circshift(reshape(1:24,2,3,4), (0,1,1)) @test extrema(A,dims=1) == reshape([(23,24),(19,20),(21,22),(5,6),(1,2),(3,4),(11,12),(7,8),(9,10),(17,18),(13,14),(15,16)],1,3,4) @test extrema(A,dims=2) == reshape([(19,23),(20,24),(1,5),(2,6),(7,11),(8,12),(13,17),(14,18)],2,1,4) diff --git a/test/reducedim.jl b/test/reducedim.jl index daa0a3fbe1f92..f4767dd7a472c 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -608,7 +608,7 @@ end end @testset "NaN/missing test for extrema with dims #43599" begin for sz = (3, 10, 100) - for T in (Int, Float64, BigFloat) + for T in (Int, Float64, BigFloat, BigInt) Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz)) Aₘ[rand(1:sz*sz, sz)] .= missing unordered_test_for_extrema(Aₘ) @@ -622,9 +622,16 @@ end end end end -@test_broken minimum([missing;BigInt(1)], dims = 1) -@test_broken maximum([missing;BigInt(1)], dims = 1) -@test_broken extrema([missing;BigInt(1)], dims = 1) + +@testset "minimum/maximum over dims with missing (#35308)" begin + for T in (Int, Float64, BigInt, BigFloat) + x = Union{T, Missing}[1 missing; 2 missing] + @test isequal(minimum(x, dims=1), reshape([1, missing], 1, :)) + @test isequal(maximum(x, dims=1), reshape([2, missing], 1, :)) + @test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1)) + @test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1)) + end +end # issue #26709 @testset "dimensional reduce with custom non-bitstype types" begin From b93a94261d0965704986bc1e8850d71077da4f25 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 18 Nov 2023 15:26:05 +0100 Subject: [PATCH 3/3] Add fast path for types that support `typemin`/`typemax` --- base/reducedim.jl | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 6ce8acc3fcd89..21dff3b46ab37 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -150,9 +150,14 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min if v0 isa Number && isnan(v0) # v0 is NaN v0 = oftype(v0, $initval) - elseif isunordered(v0) && !all(isunordered, A1) + elseif isunordered(v0) # v0 is missing or a third-party unordered value - v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1)) + Tnm = nonmissingtype(Tr) + if Tnm <: Union{BitInteger, IEEEFloat, BigFloat} + v0 = $typeextreme(Tnm) + elseif !all(isunordered, A1) + v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1)) + end end # v0 may have changed type. Tr = v0 isa T ? T : typeof(v0) @@ -185,9 +190,16 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray if v0[1] isa Number && isnan(v0[1]) # v0 is NaN v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf) - elseif isunordered(v0[1]) && !all(isunordered, A1) + elseif isunordered(v0[1]) # v0 is missing or a third-party unordered value - v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1))) + Tminnm = nonmissingtype(Tmin) + Tmaxnm = nonmissingtype(Tmax) + if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} && + Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat} + v0 = (typemax(Tminnm), typemin(Tmaxnm)) + elseif !all(isunordered, A1) + v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1))) + end end # v0 may have changed type. Tmin = v0[1] isa T ? T : typeof(v0[1])