|
129 | 129 | * when these conditions are not met VML functions may produce incorrect output
|
130 | 130 | */
|
131 | 131 | #define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
|
| 132 | +#define DISJOINT_OR_SAME_TWO_DTYPES(p1, p2, n, s1, s2) (((p1) == (p2)) || ((p2) + (n)*(s2) < (p1)) || ((p1) + (n)*(s1) < (p2)) ) |
132 | 133 |
|
133 | 134 | /*
|
134 | 135 | * include vectorized functions and dispatchers
|
@@ -316,8 +317,7 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
|
316 | 317 | can_vectorize
|
317 | 318 | ,
|
318 | 319 | const @type@ in1 = *(@type@ *)ip1;
|
319 |
| - const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
320 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 320 | + ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
321 | 321 | *(@type@ *)op1 = @scalarf@(in1);
|
322 | 322 | )
|
323 | 323 | }
|
@@ -355,8 +355,7 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s
|
355 | 355 | can_vectorize
|
356 | 356 | ,
|
357 | 357 | const @type@ in1 = *(@type@ *)ip1;
|
358 |
| - const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
359 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 358 | + ignore_fpstatus = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY; |
360 | 359 | *(@type@ *)op1 = @scalarf@(in1);
|
361 | 360 | )
|
362 | 361 | }
|
@@ -493,8 +492,7 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s
|
493 | 492 | can_vectorize
|
494 | 493 | ,
|
495 | 494 | const @type@ in1 = *(@type@ *)ip1;
|
496 |
| - const int invalid_cases = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; |
497 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 495 | + ignore_fpstatus = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY; |
498 | 496 | *(@type@ *)op1 = @scalarf@(in1);
|
499 | 497 | )
|
500 | 498 | }
|
@@ -2124,10 +2122,9 @@ mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_in
|
2124 | 2122 | BINARY_LOOP {
|
2125 | 2123 | const @type@ in1 = *(@type@ *)ip1;
|
2126 | 2124 | const @type@ in2 = *(@type@ *)ip2;
|
2127 |
| - int invalid_cases = !npy_isnan(in1) && in2 == 0; |
2128 |
| - invalid_cases |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); |
2129 |
| - invalid_cases |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); |
2130 |
| - ignore_fpstatus |= (invalid_cases ? 1 : 0); |
| 2125 | + ignore_fpstatus = !npy_isnan(in1) && in2 == 0; |
| 2126 | + ignore_fpstatus |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2); |
| 2127 | + ignore_fpstatus |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY); |
2131 | 2128 | divmod@c@(in1, in2, (@type@ *)op1);
|
2132 | 2129 | }
|
2133 | 2130 | }
|
@@ -2376,10 +2373,10 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
|
2376 | 2373 | * complex types
|
2377 | 2374 | * #TYPE = CFLOAT, CDOUBLE#
|
2378 | 2375 | * #ftype = npy_float, npy_double#
|
| 2376 | + * #type = npy_cfloat, npy_cdouble# |
2379 | 2377 | * #c = f, #
|
2380 |
| - * #C = F, # |
2381 |
| - * #s = s, d# |
2382 |
| - * #SUPPORTED_BY_VML = 1, 1# |
| 2378 | + * #C = F, # |
| 2379 | + * #s = c, z# |
2383 | 2380 | */
|
2384 | 2381 |
|
2385 | 2382 | /* similar to pairwise sum of real floats */
|
@@ -2659,44 +2656,47 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
|
2659 | 2656 | }
|
2660 | 2657 | }
|
2661 | 2658 |
|
2662 |
| -/* TODO: USE MKL */ |
2663 | 2659 | void
|
2664 | 2660 | mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
|
2665 |
| - UNARY_LOOP { |
2666 |
| - const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
2667 |
| - const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
2668 |
| - ((@ftype@ *)op1)[0] = in1r; |
2669 |
| - ((@ftype@ *)op1)[1] = -in1i; |
2670 |
| - } |
| 2661 | + const int contig = IS_UNARY_CONT(@type@, @type@); |
| 2662 | + const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@)); |
| 2663 | + const int can_vectorize = contig && disjoint_or_same; |
| 2664 | + |
| 2665 | + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
| 2666 | + CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]); |
| 2667 | + /* v@s@Conj(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ |
| 2668 | + } else { |
| 2669 | + UNARY_LOOP { |
| 2670 | + const @ftype@ in1r = ((@ftype@ *)ip1)[0]; |
| 2671 | + const @ftype@ in1i = ((@ftype@ *)ip1)[1]; |
| 2672 | + ((@ftype@ *)op1)[0] = in1r; |
| 2673 | + ((@ftype@ *)op1)[1] = -in1i; |
| 2674 | + } |
| 2675 | + } |
2671 | 2676 | }
|
2672 | 2677 |
|
2673 |
| -/* TODO: USE MKL */ |
2674 | 2678 | void
|
2675 | 2679 | mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
|
2676 | 2680 | {
|
| 2681 | + const int contig = IS_UNARY_CONT(@type@, @ftype@); |
| 2682 | + const int disjoint_or_same = DISJOINT_OR_SAME_TWO_DTYPES(args[0], args[1], dimensions[0], sizeof(@type@), sizeof(@ftype@)); |
| 2683 | + const int can_vectorize = contig && disjoint_or_same; |
2677 | 2684 | int ignore_fpstatus = 0;
|
2678 |
| - |
2679 |
| - // FIXME: abs function VML for complex numbers breaks FFT test_basic.py |
2680 |
| - //if(steps[0]/2 == sizeof(@ftype@) && steps[1] == sizeof(@ftype@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
2681 |
| -#if @SUPPORTED_BY_VML@ |
2682 |
| - if(0 == 1) { |
| 2685 | + |
| 2686 | + if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) { |
2683 | 2687 | ignore_fpstatus = 1;
|
2684 |
| - CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @ftype@, args[0], args[1]); |
2685 |
| - /* v@s@Abs(dimensions[0], (@ftype@ *) args[0], (@ftype@ *) args[1]); */ |
2686 |
| - } else |
2687 |
| -#endif |
2688 |
| - { |
| 2688 | + CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]); |
| 2689 | + /* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */ |
| 2690 | + } else { |
2689 | 2691 | UNARY_LOOP {
|
2690 | 2692 | const @ftype@ in1r = ((@ftype@ *)ip1)[0];
|
2691 | 2693 | const @ftype@ in1i = ((@ftype@ *)ip1)[1];
|
2692 |
| - if(in1r == 0.0 && in1i == 0.0){ |
2693 |
| - ignore_fpstatus = 1; |
2694 |
| - } |
| 2694 | + ignore_fpstatus = npy_isnan(in1r) && npy_isnan(in1i); |
2695 | 2695 | *((@ftype@ *)op1) = hypot@c@(in1r, in1i);
|
2696 | 2696 | }
|
2697 | 2697 | }
|
2698 | 2698 | if(ignore_fpstatus) {
|
2699 |
| - feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID); |
| 2699 | + feclearexcept(FE_INVALID); |
2700 | 2700 | }
|
2701 | 2701 | }
|
2702 | 2702 |
|
|
0 commit comments