|
129 | 129 | * when these conditions are not met VML functions may produce incorrect output
|
130 | 130 | */
|
131 | 131 | #define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
|
| 132 | +#define DISJOINT_OR_SAME_TWO_DTYPES(p1, p2, n, s1, s2) (((p1) == (p2)) || ((p2) + (n)*(s2) < (p1)) || ((p1) + (n)*(s1) < (p2)) ) |
132 | 133 |
|
133 | 134 | /*
|
134 | 135 | * include vectorized functions and dispatchers
|
@@ -317,7 +318,7 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
|
317 | 318 | ,
|
318 | 319 | const @type@ in1 = *(@type@ *)ip1;
|
319 | 320 | const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY;
|
320 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 321 | + ignore_fpstatus |= invalid_cases; |
321 | 322 | *(@type@ *)op1 = @scalarf@(in1);
|
322 | 323 | )
|
323 | 324 | }
|
@@ -356,7 +357,7 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s
|
356 | 357 | ,
|
357 | 358 | const @type@ in1 = *(@type@ *)ip1;
|
358 | 359 | const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY;
|
359 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 360 | + ignore_fpstatus |= invalid_cases; |
360 | 361 | *(@type@ *)op1 = @scalarf@(in1);
|
361 | 362 | )
|
362 | 363 | }
|
@@ -494,7 +495,7 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s
|
494 | 495 | ,
|
495 | 496 | const @type@ in1 = *(@type@ *)ip1;
|
496 | 497 | const int invalid_cases = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY;
|
497 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 498 | + ignore_fpstatus |= invalid_cases; |
498 | 499 | *(@type@ *)op1 = @scalarf@(in1);
|
499 | 500 | )
|
500 | 501 | }
|
@@ -2127,7 +2128,7 @@ mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_in
|
2127 | 2128 | int invalid_cases = !npy_isnan(in1) && in2 == 0;
|
2128 | 2129 | invalid_cases |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2);
|
2129 | 2130 | invalid_cases |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY);
|
2130 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 2131 | + ignore_fpstatus |= invalid_cases; |
2131 | 2132 | divmod@c@(in1, in2, (@type@ *)op1);
|
2132 | 2133 | }
|
2133 | 2134 | }
|
@@ -2376,10 +2377,10 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
|
2376 | 2377 | * complex types
|
2377 | 2378 | * #TYPE = CFLOAT, CDOUBLE#
|
2378 | 2379 | * #ftype = npy_float, npy_double#
|
| 2380 | + * #type = npy_cfloat, npy_cdouble# |
2379 | 2381 | * #c = f, #
|
2380 |
| - * #C = F, # |
2381 |
| - * #s = s, d# |
2382 |
| - * #SUPPORTED_BY_VML = 1, 1# |
| 2382 | + * #C = F, # |
| 2383 | + * #s = c, z# |
2383 | 2384 | */
|
2384 | 2385 |
|
2385 | 2386 | /* similar to pairwise sum of real floats */
|
@@ -2659,44 +2660,46 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
|
2659 | 2660 | }
|
2660 | 2661 | }
|
2661 | 2662 |
|
2662 |
| -/* TODO: USE MKL */ |
2663 | 2663 | void
|
2664 | 2664 | mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
|
2665 |
| - UNARY_LOOP { |
2666 |
| - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
2667 |
| - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
2668 |
| - ((@ftype@ *)op1)[0] = in1r; |
2669 |
| - ((@ftype@ *)op1)[1] = -in1i; |
| 2665 | + const int contig = IS_UNARY_CONT(@type@, @type@); |
| 2666 | + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); |
| 2667 | + const int can_vectorize = contig && disjoint_or_same; |
| 2668 | + |
| 2669 | + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
| 2670 | + CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]); |
| 2671 | + /* v@s@Conj(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ |
| 2672 | + } else { |
| 2673 | + UNARY_LOOP { |
| 2674 | + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
| 2675 | + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
| 2676 | + ((@ftype@ *)op1)[0] = in1r; |
| 2677 | + ((@ftype@ *)op1)[1] = -in1i; |
| 2678 | + } |
2670 | 2679 | }
|
2671 | 2680 | }
|
2672 | 2681 |
|
2673 |
| -/* TODO: USE MKL */ |
2674 | 2682 | void
|
2675 | 2683 | mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
|
2676 | 2684 | {
|
| 2685 | + const int contig = IS_UNARY_CONT(@type@, @ftype@); |
| 2686 | + const int disjoint_or_same = DISJOINT_OR_SAME_TWO_DTYPES(args[0], args[1], dimensions[0], sizeof(@type@), sizeof(@ftype@)); |
| 2687 | + const int can_vectorize = contig && disjoint_or_same; |
2677 | 2688 | int ignore_fpstatus = 0;
|
2678 |
| - |
2679 |
| - // FIXME: abs function VML for complex numbers breaks FFT test_basic.py |
2680 |
| - //if(steps[0]/2 == sizeof(@ftype@) && steps[1] == sizeof(@ftype@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
2681 |
| -#if @SUPPORTED_BY_VML@ |
2682 |
| - if(0 == 1) { |
| 2689 | + |
| 2690 | + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
2683 | 2691 | ignore_fpstatus = 1;
|
2684 |
| - CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @ftype@, args[0], args[1]); |
2685 |
| - /* v@s@Abs(dimensions[0], (@ftype@ *) args[0], (@ftype@ *) args[1]); */ |
2686 |
| - } else |
2687 |
| -#endif |
2688 |
| - { |
| 2692 | + CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]); |
| 2693 | + /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ |
| 2694 | + } else { |
2689 | 2695 | UNARY_LOOP {
|
2690 | 2696 | const @ftype@ in1r = ((@ftype@ *)ip1)[0];
|
2691 | 2697 | const @ftype@ in1i = ((@ftype@ *)ip1)[1];
|
2692 |
| - if(in1r == 0.0 && in1i == 0.0){ |
2693 |
| - ignore_fpstatus = 1; |
2694 |
| - } |
2695 | 2698 | *((@ftype@ *)op1) = hypot@c@(in1r, in1i);
|
2696 | 2699 | }
|
2697 | 2700 | }
|
2698 | 2701 | if(ignore_fpstatus) {
|
2699 |
| - feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); |
| 2702 | + feclearexcept(FE_INVALID); |
2700 | 2703 | }
|
2701 | 2704 | }
|
2702 | 2705 |
|
|
0 commit comments