Skip to content

Commit 82841b0

Browse files
committed
redo sinpi and cospi kernel polynomial approximation
* Make the `Float16` and `Float32` variants use the same evaluation scheme already used for `Float64`, instead of computing in a wider arithmetic and then rounding back to the narrower arithmetic. This of course results in some loss of accuracy, however it seems more appropriate not to rely on other arithmetics, I guess. Even though the accuracy is worse, the result still seems experimentally to be faithful for each argument from the domain of the kernel in each case except for `sinpi(::Float16)` which has max error of about 1.4 ULP. * Regenerate the polynomials for `Float64`, too, improving accuracy. The degrees of the polynomials stay the same. Closes JuliaLang#58977
1 parent e4755de commit 82841b0

File tree

1 file changed

+244
-20
lines changed

1 file changed

+244
-20
lines changed

base/special/trig.jl

Lines changed: 244 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -727,18 +727,13 @@ end
727727

728728
# Uses minimax polynomial of sin(π * x) for π * x in [0, .25]
729729
@inline function sinpi_kernel(x::Float64)
730-
sinpi_kernel_wide(x)
730+
_sinpi_kernel_polynomial_f64(x)
731731
end
732732
@inline function sinpi_kernel_wide(x::Float64)
733-
= x*x
734-
x⁴ =*
735-
r = evalpoly(x², (2.5501640398773415, -0.5992645293202981, 0.08214588658006512,
736-
-7.370429884921779e-3, 4.662827319453555e-4, -2.1717412523382308e-5))
737-
return muladd(3.141592653589793, x, x*muladd(-5.16771278004997,
738-
x², muladd(x⁴, r, 1.2245907532225998e-16)))
733+
_sinpi_kernel_polynomial_f64(x)
739734
end
740735
@inline function sinpi_kernel(x::Float32)
741-
Float32(sinpi_kernel_wide(x))
736+
_sinpi_kernel_polynomial_f32(x)
742737
end
743738
@inline function sinpi_kernel_wide(x::Float32)
744739
x = Float64(x)
@@ -747,7 +742,7 @@ end
747742
end
748743

749744
@inline function sinpi_kernel(x::Float16)
750-
Float16(sinpi_kernel_wide(x))
745+
_sinpi_kernel_polynomial_f16(x)
751746
end
752747
@inline function sinpi_kernel_wide(x::Float16)
753748
x = Float32(x)
@@ -756,34 +751,263 @@ end
756751

757752
# Uses minimax polynomial of cos(π * x) for π * x in [0, .25]
758753
@inline function cospi_kernel(x::Float64)
759-
cospi_kernel_wide(x)
754+
_cospi_kernel_polynomial_f64(x)
760755
end
761756
@inline function cospi_kernel_wide(x::Float64)
762-
= x*x
763-
r =*evalpoly(x², (4.058712126416765, -1.3352627688537357, 0.23533063027900392,
764-
-0.025806887811869204, 1.9294917136379183e-3, -1.0368935675474665e-4))
765-
a_x² = 4.934802200544679 *
766-
a_x²lo = muladd(3.109686485461973e-16, x², muladd(4.934802200544679, x², -a_x²))
767-
768-
w = 1.0-a_x²
769-
return w + muladd(x², r, ((1.0-w)-a_x²) - a_x²lo)
757+
_cospi_kernel_polynomial_f64(x)
770758
end
771759
@inline function cospi_kernel(x::Float32)
772-
Float32(cospi_kernel_wide(x))
760+
_cospi_kernel_polynomial_f32(x)
773761
end
774762
@inline function cospi_kernel_wide(x::Float32)
775763
x = Float64(x)
776764
return evalpoly(x*x, (1.0, -4.934802200541122, 4.058712123568637,
777765
-1.3352624040152927, 0.23531426791507182, -0.02550710082498761))
778766
end
779767
@inline function cospi_kernel(x::Float16)
780-
Float16(cospi_kernel_wide(x))
768+
_cospi_kernel_polynomial_f16(x)
781769
end
782770
@inline function cospi_kernel_wide(x::Float16)
783771
x = Float32(x)
784772
return evalpoly(x*x, (1.0f0, -4.934802f0, 4.058712f0, -1.3352624f0, 0.23531426f0, -0.0255071f0))
785773
end
786774

775+
struct CosPiEvaluationScheme{T <: AbstractFloat, Rest <: Tuple{T, Vararg{T}}}
776+
"""
777+
Coefficient 0 of the polynomial.
778+
"""
779+
c₀::T
780+
"""
781+
Coefficient 2 of the polynomial, in double-word format (unevaluated sum of two floating-point numbers), stored as `(high, low)`.
782+
"""
783+
c₂::NTuple{2, T}
784+
"""
785+
Rest of the coefficients.
786+
"""
787+
rest::Rest
788+
function CosPiEvaluationScheme(;
789+
c₀::AbstractFloat,
790+
c₂::(NTuple{2, T} where {T <: AbstractFloat}),
791+
rest::(Tuple{T, Vararg{T}} where {T <: AbstractFloat}),
792+
)
793+
new{eltype(rest), typeof(rest)}(c₀, c₂, rest)
794+
end
795+
end
796+
797+
struct SinPiEvaluationScheme{T <: AbstractFloat, Rest <: Tuple{T, Vararg{T}}}
798+
"""
799+
Coefficient 1 of the polynomial, in double-word format (unevaluated sum of two floating-point numbers), stored as `(high, low)`.
800+
"""
801+
c₁::NTuple{2, T}
802+
"""
803+
Rest of the coefficients.
804+
"""
805+
rest::Rest
806+
function SinPiEvaluationScheme(;
807+
c₁::(NTuple{2, T} where {T <: AbstractFloat}),
808+
rest::(Tuple{T, Vararg{T}} where {T <: AbstractFloat}),
809+
)
810+
new{eltype(rest), typeof(rest)}(c₁, rest)
811+
end
812+
end
813+
814+
function (sch::CosPiEvaluationScheme)(x::AbstractFloat)
815+
= x * x
816+
c₀ = sch.c₀
817+
(c₂, c₂_lo) = sch.c₂
818+
rest = sch.rest
819+
c₂ = -c₂
820+
c₂_lo = -c₂_lo
821+
r = evalpoly(x², rest) *
822+
a_x² = c₂ *
823+
a_x²_lo = muladd(c₂_lo, x², muladd(c₂, x², -a_x²))
824+
w = c₀ - a_x²
825+
w + muladd(x², r, ((c₀ - w) - a_x²) - a_x²_lo)
826+
end
827+
828+
function (sch::SinPiEvaluationScheme)(x::AbstractFloat)
829+
= x * x
830+
(c₁, c₁_lo) = sch.c₁
831+
(c₃, rest...) = sch.rest
832+
if rest === ()
833+
r = typeof(x)(0)
834+
else
835+
r = evalpoly(x², rest)
836+
end
837+
muladd(c₁, x, x * muladd(c₃, x², muladd(x² * x², r, c₁_lo)))
838+
end
839+
840+
/*
841+
# Polynomial approximation for `cospi` and `sinpi` kernels
842+
843+
## `cospi`
844+
845+
Constrain the zeroth coefficient to `1` to achieve exact behavior for zero input.
846+
847+
* `Float16`:
848+
849+
```sollya
850+
handTuned = 1;
851+
prec = 500!;
852+
accurate = cos(pi * x);
853+
kernelDomain = [-2^-3, 2^-2];
854+
constrainedPart = 1;
855+
machinePrecision = 11;
856+
doubleWordPrecision = 2 * machinePrecision + handTuned;
857+
freeMonomials = [|2, 4|];
858+
freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision|];
859+
polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain, constrainedPart);
860+
supnormPrecision = 2^-10;
861+
sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
862+
polynomial;
863+
```
864+
865+
* `Float32`:
866+
867+
```sollya
868+
handTuned = 3;
869+
prec = 500!;
870+
accurate = cos(pi * x);
871+
kernelDomain = [-2^-3, 2^-2];
872+
constrainedPart = 1;
873+
machinePrecision = 24;
874+
doubleWordPrecision = 2 * machinePrecision + handTuned;
875+
freeMonomials = [|2, 4, 6, 8|];
876+
freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision|];
877+
polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain, constrainedPart);
878+
supnormPrecision = 2^-10;
879+
sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
880+
polynomial;
881+
```
882+
883+
* `Float64`:
884+
885+
```sollya
886+
handTuned = 1;
887+
prec = 500!;
888+
accurate = cos(pi * x);
889+
kernelDomain = [-2^-3, 2^-2];
890+
constrainedPart = 1;
891+
machinePrecision = 53;
892+
doubleWordPrecision = 2 * machinePrecision + handTuned;
893+
freeMonomials = [|2, 4, 6, 8, 10, 12, 14|];
894+
freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
895+
polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain, constrainedPart);
896+
supnormPrecision = 2^-10;
897+
sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
898+
polynomial;
899+
```
900+
901+
## `sinpi`
902+
903+
* `Float16`:
904+
905+
```sollya
906+
handTuned = 3;
907+
prec = 500!;
908+
accurate = sin(pi * x);
909+
kernelDomain = [-2^-3, 2^-2];
910+
machinePrecision = 11;
911+
doubleWordPrecision = 2 * machinePrecision + handTuned;
912+
freeMonomials = [|1, 3, 5|];
913+
freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision|];
914+
polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain);
915+
supnormPrecision = 2^-10;
916+
sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
917+
polynomial;
918+
```
919+
920+
* `Float32`:
921+
922+
```sollya
923+
handTuned = 1;
924+
prec = 500!;
925+
accurate = sin(pi * x);
926+
kernelDomain = [-2^-3, 2^-2];
927+
machinePrecision = 24;
928+
doubleWordPrecision = 2 * machinePrecision + handTuned;
929+
freeMonomials = [|1, 3, 5, 7|];
930+
freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision|];
931+
polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain);
932+
supnormPrecision = 2^-10;
933+
sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
934+
polynomial;
935+
```
936+
937+
* `Float64`:
938+
939+
```sollya
940+
handTuned = 3;
941+
prec = 500!;
942+
accurate = sin(pi * x);
943+
kernelDomain = [-2^-3, 2^-2];
944+
machinePrecision = 53;
945+
doubleWordPrecision = 2 * machinePrecision + handTuned;
946+
freeMonomials = [|1, 3, 5, 7, 9, 11, 13, 15|];
947+
freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
948+
polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain);
949+
supnormPrecision = 2^-10;
950+
sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
951+
polynomial;
952+
```
953+
*/
954+
955+
const _cospi_kernel_polynomial_f16 = CosPiEvaluationScheme(;
956+
c₀ = Float16(1),
957+
c₂ = (Float16(-4.934), Float16(0.001134)),
958+
rest = (
959+
Float16(3.941),
960+
),
961+
)
962+
const _sinpi_kernel_polynomial_f16 = SinPiEvaluationScheme(;
963+
c₁ = (Float16(3.14), Float16(0.0009737)),
964+
rest = (
965+
Float16(-5.168),
966+
Float16(2.52),
967+
),
968+
)
969+
const _cospi_kernel_polynomial_f32 = CosPiEvaluationScheme(;
970+
c₀ = Float32(1),
971+
c₂ = (-4.934802f0, -1.1607644f-7),
972+
rest = (
973+
4.0587077f0,
974+
-1.3350532f0,
975+
0.23138562f0,
976+
),
977+
)
978+
const _sinpi_kernel_polynomial_f32 = SinPiEvaluationScheme(;
979+
c₁ = (3.1415927f0, -9.674135f-8),
980+
rest = (
981+
-5.167708f0,
982+
2.5497704f0,
983+
-0.58910555f0,
984+
),
985+
)
986+
const _cospi_kernel_polynomial_f64 = CosPiEvaluationScheme(;
987+
c₀ = Float64(1),
988+
c₂ = (-4.934802200544679, -2.6451348079795815e-16),
989+
rest = (
990+
4.058712126416749,
991+
-1.3352627688519947,
992+
0.2353306301924776,
993+
-0.025806885661227713,
994+
0.0019294656071154924,
995+
-0.00010356606727649327,
996+
),
997+
)
998+
const _sinpi_kernel_polynomial_f64 = SinPiEvaluationScheme(;
999+
c₁ = (3.141592653589793, 1.2267151843884804e-16),
1000+
rest = (
1001+
-5.16771278004997,
1002+
2.550164039877393,
1003+
-0.5992645293247603,
1004+
0.082145886770189,
1005+
-0.007370434116378644,
1006+
0.000466329949762989,
1007+
-2.1925990105975317e-5,
1008+
),
1009+
)
1010+
7871011
"""
7881012
sinpi(x::T) where T -> float(T)
7891013

0 commit comments

Comments
 (0)