@@ -727,18 +727,13 @@ end
727
727
728
728
# Uses minimax polynomial of sin(π * x) for π * x in [0, .25]
729
729
@inline function sinpi_kernel (x:: Float64 )
730
- sinpi_kernel_wide (x)
730
+ _sinpi_kernel_polynomial_f64 (x)
731
731
end
732
732
@inline function sinpi_kernel_wide (x:: Float64 )
733
- x² = x* x
734
- x⁴ = x²* x²
735
- r = evalpoly (x², (2.5501640398773415 , - 0.5992645293202981 , 0.08214588658006512 ,
736
- - 7.370429884921779e-3 , 4.662827319453555e-4 , - 2.1717412523382308e-5 ))
737
- return muladd (3.141592653589793 , x, x* muladd (- 5.16771278004997 ,
738
- x², muladd (x⁴, r, 1.2245907532225998e-16 )))
733
+ _sinpi_kernel_polynomial_f64 (x)
739
734
end
740
735
@inline function sinpi_kernel (x:: Float32 )
741
- Float32 ( sinpi_kernel_wide (x) )
736
+ _sinpi_kernel_polynomial_f32 (x )
742
737
end
743
738
@inline function sinpi_kernel_wide (x:: Float32 )
744
739
x = Float64 (x)
747
742
end
748
743
749
744
@inline function sinpi_kernel (x:: Float16 )
750
- Float16 ( sinpi_kernel_wide (x) )
745
+ _sinpi_kernel_polynomial_f16 (x )
751
746
end
752
747
@inline function sinpi_kernel_wide (x:: Float16 )
753
748
x = Float32 (x)
@@ -756,34 +751,263 @@ end
756
751
757
752
# Uses minimax polynomial of cos(π * x) for π * x in [0, .25]
758
753
@inline function cospi_kernel (x:: Float64 )
759
- cospi_kernel_wide (x)
754
+ _cospi_kernel_polynomial_f64 (x)
760
755
end
761
756
@inline function cospi_kernel_wide (x:: Float64 )
762
- x² = x* x
763
- r = x²* evalpoly (x², (4.058712126416765 , - 1.3352627688537357 , 0.23533063027900392 ,
764
- - 0.025806887811869204 , 1.9294917136379183e-3 , - 1.0368935675474665e-4 ))
765
- a_x² = 4.934802200544679 * x²
766
- a_x²lo = muladd (3.109686485461973e-16 , x², muladd (4.934802200544679 , x², - a_x²))
767
-
768
- w = 1.0 - a_x²
769
- return w + muladd (x², r, ((1.0 - w)- a_x²) - a_x²lo)
757
+ _cospi_kernel_polynomial_f64 (x)
770
758
end
771
759
@inline function cospi_kernel (x:: Float32 )
772
- Float32 ( cospi_kernel_wide (x) )
760
+ _cospi_kernel_polynomial_f32 (x )
773
761
end
774
762
@inline function cospi_kernel_wide (x:: Float32 )
775
763
x = Float64 (x)
776
764
return evalpoly (x* x, (1.0 , - 4.934802200541122 , 4.058712123568637 ,
777
765
- 1.3352624040152927 , 0.23531426791507182 , - 0.02550710082498761 ))
778
766
end
779
767
@inline function cospi_kernel (x:: Float16 )
780
- Float16 ( cospi_kernel_wide (x) )
768
+ _cospi_kernel_polynomial_f16 (x )
781
769
end
782
770
@inline function cospi_kernel_wide (x:: Float16 )
783
771
x = Float32 (x)
784
772
return evalpoly (x* x, (1.0f0 , - 4.934802f0 , 4.058712f0 , - 1.3352624f0 , 0.23531426f0 , - 0.0255071f0 ))
785
773
end
786
774
775
+ struct CosPiEvaluationScheme{T <: AbstractFloat , Rest <: Tuple{T, Vararg{T}} }
776
+ """
777
+ Coefficient 0 of the polynomial.
778
+ """
779
+ c₀:: T
780
+ """
781
+ Coefficient 2 of the polynomial, in double-word format (unevaluated sum of two floating-point numbers), stored as `(high, low)`.
782
+ """
783
+ c₂:: NTuple{2, T}
784
+ """
785
+ Rest of the coefficients.
786
+ """
787
+ rest:: Rest
788
+ function CosPiEvaluationScheme (;
789
+ c₀:: AbstractFloat ,
790
+ c₂:: (NTuple{2, T} where {T <: AbstractFloat}) ,
791
+ rest:: (Tuple{T, Vararg{T}} where {T <: AbstractFloat}) ,
792
+ )
793
+ new {eltype(rest), typeof(rest)} (c₀, c₂, rest)
794
+ end
795
+ end
796
+
797
+ struct SinPiEvaluationScheme{T <: AbstractFloat , Rest <: Tuple{T, Vararg{T}} }
798
+ """
799
+ Coefficient 1 of the polynomial, in double-word format (unevaluated sum of two floating-point numbers), stored as `(high, low)`.
800
+ """
801
+ c₁:: NTuple{2, T}
802
+ """
803
+ Rest of the coefficients.
804
+ """
805
+ rest:: Rest
806
+ function SinPiEvaluationScheme (;
807
+ c₁:: (NTuple{2, T} where {T <: AbstractFloat}) ,
808
+ rest:: (Tuple{T, Vararg{T}} where {T <: AbstractFloat}) ,
809
+ )
810
+ new {eltype(rest), typeof(rest)} (c₁, rest)
811
+ end
812
+ end
813
+
814
+ function (sch:: CosPiEvaluationScheme )(x:: AbstractFloat )
815
+ x² = x * x
816
+ c₀ = sch. c₀
817
+ (c₂, c₂_lo) = sch. c₂
818
+ rest = sch. rest
819
+ c₂ = - c₂
820
+ c₂_lo = - c₂_lo
821
+ r = evalpoly (x², rest) * x²
822
+ a_x² = c₂ * x²
823
+ a_x²_lo = muladd (c₂_lo, x², muladd (c₂, x², - a_x²))
824
+ w = c₀ - a_x²
825
+ w + muladd (x², r, ((c₀ - w) - a_x²) - a_x²_lo)
826
+ end
827
+
828
+ function (sch:: SinPiEvaluationScheme )(x:: AbstractFloat )
829
+ x² = x * x
830
+ (c₁, c₁_lo) = sch. c₁
831
+ (c₃, rest... ) = sch. rest
832
+ if rest === ()
833
+ r = typeof (x)(0 )
834
+ else
835
+ r = evalpoly (x², rest)
836
+ end
837
+ muladd (c₁, x, x * muladd (c₃, x², muladd (x² * x², r, c₁_lo)))
838
+ end
839
+
840
+ /*
841
+ # Polynomial approximation for `cospi` and `sinpi` kernels
842
+
843
+ # # `cospi`
844
+
845
+ Constrain the zeroth coefficient to ` 1` to achieve exact behavior for zero input.
846
+
847
+ * ` Float16` :
848
+
849
+ ``` sollya
850
+ handTuned = 1;
851
+ prec = 500!;
852
+ accurate = cos(pi * x);
853
+ kernelDomain = [-2^-3, 2^-2];
854
+ constrainedPart = 1;
855
+ machinePrecision = 11;
856
+ doubleWordPrecision = 2 * machinePrecision + handTuned;
857
+ freeMonomials = [|2, 4|];
858
+ freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision|];
859
+ polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain, constrainedPart);
860
+ supnormPrecision = 2^-10;
861
+ sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
862
+ polynomial;
863
+ ```
864
+
865
+ * ` Float32` :
866
+
867
+ ``` sollya
868
+ handTuned = 3;
869
+ prec = 500!;
870
+ accurate = cos(pi * x);
871
+ kernelDomain = [-2^-3, 2^-2];
872
+ constrainedPart = 1;
873
+ machinePrecision = 24;
874
+ doubleWordPrecision = 2 * machinePrecision + handTuned;
875
+ freeMonomials = [|2, 4, 6, 8|];
876
+ freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision|];
877
+ polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain, constrainedPart);
878
+ supnormPrecision = 2^-10;
879
+ sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
880
+ polynomial;
881
+ ```
882
+
883
+ * ` Float64` :
884
+
885
+ ``` sollya
886
+ handTuned = 1;
887
+ prec = 500!;
888
+ accurate = cos(pi * x);
889
+ kernelDomain = [-2^-3, 2^-2];
890
+ constrainedPart = 1;
891
+ machinePrecision = 53;
892
+ doubleWordPrecision = 2 * machinePrecision + handTuned;
893
+ freeMonomials = [|2, 4, 6, 8, 10, 12, 14|];
894
+ freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
895
+ polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain, constrainedPart);
896
+ supnormPrecision = 2^-10;
897
+ sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
898
+ polynomial;
899
+ ```
900
+
901
+ # # `sinpi`
902
+
903
+ * ` Float16` :
904
+
905
+ ``` sollya
906
+ handTuned = 3;
907
+ prec = 500!;
908
+ accurate = sin(pi * x);
909
+ kernelDomain = [-2^-3, 2^-2];
910
+ machinePrecision = 11;
911
+ doubleWordPrecision = 2 * machinePrecision + handTuned;
912
+ freeMonomials = [|1, 3, 5|];
913
+ freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision|];
914
+ polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain);
915
+ supnormPrecision = 2^-10;
916
+ sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
917
+ polynomial;
918
+ ```
919
+
920
+ * ` Float32` :
921
+
922
+ ``` sollya
923
+ handTuned = 1;
924
+ prec = 500!;
925
+ accurate = sin(pi * x);
926
+ kernelDomain = [-2^-3, 2^-2];
927
+ machinePrecision = 24;
928
+ doubleWordPrecision = 2 * machinePrecision + handTuned;
929
+ freeMonomials = [|1, 3, 5, 7|];
930
+ freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision|];
931
+ polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain);
932
+ supnormPrecision = 2^-10;
933
+ sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
934
+ polynomial;
935
+ ```
936
+
937
+ * ` Float64` :
938
+
939
+ ``` sollya
940
+ handTuned = 3;
941
+ prec = 500!;
942
+ accurate = sin(pi * x);
943
+ kernelDomain = [-2^-3, 2^-2];
944
+ machinePrecision = 53;
945
+ doubleWordPrecision = 2 * machinePrecision + handTuned;
946
+ freeMonomials = [|1, 3, 5, 7, 9, 11, 13, 15|];
947
+ freeMonomialPrecisions = [|doubleWordPrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision, machinePrecision|];
948
+ polynomial = fpminimax(accurate, freeMonomials, freeMonomialPrecisions, kernelDomain);
949
+ supnormPrecision = 2^-10;
950
+ sup(supnorm(polynomial, accurate, kernelDomain, relative, supnormPrecision));
951
+ polynomial;
952
+ ```
953
+ */
954
+
955
+ const _cospi_kernel_polynomial_f16 = CosPiEvaluationScheme (;
956
+ c₀ = Float16 (1 ),
957
+ c₂ = (Float16 (- 4.934 ), Float16 (0.001134 )),
958
+ rest = (
959
+ Float16 (3.941 ),
960
+ ),
961
+ )
962
+ const _sinpi_kernel_polynomial_f16 = SinPiEvaluationScheme (;
963
+ c₁ = (Float16 (3.14 ), Float16 (0.0009737 )),
964
+ rest = (
965
+ Float16 (- 5.168 ),
966
+ Float16 (2.52 ),
967
+ ),
968
+ )
969
+ const _cospi_kernel_polynomial_f32 = CosPiEvaluationScheme (;
970
+ c₀ = Float32 (1 ),
971
+ c₂ = (- 4.934802f0 , - 1.1607644f-7 ),
972
+ rest = (
973
+ 4.0587077f0 ,
974
+ - 1.3350532f0 ,
975
+ 0.23138562f0 ,
976
+ ),
977
+ )
978
+ const _sinpi_kernel_polynomial_f32 = SinPiEvaluationScheme (;
979
+ c₁ = (3.1415927f0 , - 9.674135f-8 ),
980
+ rest = (
981
+ - 5.167708f0 ,
982
+ 2.5497704f0 ,
983
+ - 0.58910555f0 ,
984
+ ),
985
+ )
986
+ const _cospi_kernel_polynomial_f64 = CosPiEvaluationScheme (;
987
+ c₀ = Float64 (1 ),
988
+ c₂ = (- 4.934802200544679 , - 2.6451348079795815e-16 ),
989
+ rest = (
990
+ 4.058712126416749 ,
991
+ - 1.3352627688519947 ,
992
+ 0.2353306301924776 ,
993
+ - 0.025806885661227713 ,
994
+ 0.0019294656071154924 ,
995
+ - 0.00010356606727649327 ,
996
+ ),
997
+ )
998
+ const _sinpi_kernel_polynomial_f64 = SinPiEvaluationScheme (;
999
+ c₁ = (3.141592653589793 , 1.2267151843884804e-16 ),
1000
+ rest = (
1001
+ - 5.16771278004997 ,
1002
+ 2.550164039877393 ,
1003
+ - 0.5992645293247603 ,
1004
+ 0.082145886770189 ,
1005
+ - 0.007370434116378644 ,
1006
+ 0.000466329949762989 ,
1007
+ - 2.1925990105975317e-5 ,
1008
+ ),
1009
+ )
1010
+
787
1011
"""
788
1012
sinpi(x::T) where T -> float(T)
789
1013
0 commit comments