Skip to content

Commit 35efea5

Browse files
authored
add new complex vm functions (#86)
* add new complex vm functions * address comments * use npy_hypot * Revert "use npy_hypot" This reverts commit 9bff9c1.
1 parent ac0e478 commit 35efea5

File tree

2 files changed

+33
-29
lines changed

2 files changed

+33
-29
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88

99
### Added
1010
* Added mkl implementation for floating point data-types of `exp2`, `log2`, `fabs`, `copysign`, `nextafter`, `fmax`, `fmin` and `remainder` functions [gh-81](https://github.com/IntelPython/mkl_umath/pull/81)
11+
* Added mkl implementation for complex data-types of `conjugate` and `absolute` functions [gh-86](https://github.com/IntelPython/mkl_umath/pull/86)
1112

12-
## [0.2.0] (06/DD/2025)
13+
## [0.2.0] (06/03/2025)
1314
This release updates `mkl_umath` to be aligned with both numpy-1.26.x and numpy-2.x.x.
1415

1516
### Added

mkl_umath/src/mkl_umath_loops.c.src

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@
129129
* when these conditions are not met VML functions may produce incorrect output
130130
*/
131131
#define DISJOINT_OR_SAME(p1, p2, n, s) (((p1) == (p2)) || ((p2) + (n)*(s) < (p1)) || ((p1) + (n)*(s) < (p2)) )
132+
#define DISJOINT_OR_SAME_TWO_DTYPES(p1, p2, n, s1, s2) (((p1) == (p2)) || ((p2) + (n)*(s2) < (p1)) || ((p1) + (n)*(s1) < (p2)) )
132133

133134
/*
134135
* include vectorized functions and dispatchers
@@ -317,7 +318,7 @@ mkl_umath_@TYPE@_exp(char **args, const npy_intp *dimensions, const npy_intp *st
317318
,
318319
const @type@ in1 = *(@type@ *)ip1;
319320
const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY;
320-
ignore_fpstatus |= (invalid_cases ? 1 : 0);
321+
ignore_fpstatus |= invalid_cases;
321322
*(@type@ *)op1 = @scalarf@(in1);
322323
)
323324
}
@@ -356,7 +357,7 @@ mkl_umath_@TYPE@_exp2(char **args, const npy_intp *dimensions, const npy_intp *s
356357
,
357358
const @type@ in1 = *(@type@ *)ip1;
358359
const int invalid_cases = npy_isnan(in1) || in1 == NPY_INFINITY || in1 == -NPY_INFINITY;
359-
ignore_fpstatus |= (invalid_cases ? 1 : 0);
360+
ignore_fpstatus |= invalid_cases;
360361
*(@type@ *)op1 = @scalarf@(in1);
361362
)
362363
}
@@ -494,7 +495,7 @@ mkl_umath_@TYPE@_log2(char **args, const npy_intp *dimensions, const npy_intp *s
494495
,
495496
const @type@ in1 = *(@type@ *)ip1;
496497
const int invalid_cases = in1 < 0 || in1 == 0 || npy_isnan(in1) || in1 == -NPY_INFINITY;
497-
ignore_fpstatus |= (invalid_cases ? 1 : 0);
498+
ignore_fpstatus |= invalid_cases;
498499
*(@type@ *)op1 = @scalarf@(in1);
499500
)
500501
}
@@ -2127,7 +2128,7 @@ mkl_umath_@TYPE@_remainder(char **args, const npy_intp *dimensions, const npy_in
21272128
int invalid_cases = !npy_isnan(in1) && in2 == 0;
21282129
invalid_cases |= (in1 == NPY_INFINITY || in1 == -NPY_INFINITY) && !npy_isnan(in2);
21292130
invalid_cases |= (in1 != NPY_INFINITY && in1 != -NPY_INFINITY) && (in2 == NPY_INFINITY || in2 == -NPY_INFINITY);
2130-
ignore_fpstatus |= (invalid_cases ? 1 : 0);
2131+
ignore_fpstatus |= invalid_cases;
21312132
divmod@c@(in1, in2, (@type@ *)op1);
21322133
}
21332134
}
@@ -2376,10 +2377,10 @@ mkl_umath_@TYPE@_ldexp_long(char **args, const npy_intp *dimensions, const npy_i
23762377
* complex types
23772378
* #TYPE = CFLOAT, CDOUBLE#
23782379
* #ftype = npy_float, npy_double#
2380+
* #type = npy_cfloat, npy_cdouble#
23792381
* #c = f, #
2380-
* #C = F, #
2381-
* #s = s, d#
2382-
* #SUPPORTED_BY_VML = 1, 1#
2382+
* #C = F, #
2383+
* #s = c, z#
23832384
*/
23842385

23852386
/* similar to pairwise sum of real floats */
@@ -2659,44 +2660,46 @@ mkl_umath_@TYPE@__ones_like(char **args, const npy_intp *dimensions, const npy_i
26592660
}
26602661
}
26612662

2662-
/* TODO: USE MKL */
26632663
void
26642664
mkl_umath_@TYPE@_conjugate(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func)) {
2665-
UNARY_LOOP {
2666-
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2667-
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2668-
((@ftype@ *)op1)[0] = in1r;
2669-
((@ftype@ *)op1)[1] = -in1i;
2665+
const int contig = IS_UNARY_CONT(@type@, @type@);
2666+
const int disjoint_or_same = DISJOINT_OR_SAME(args[0], args[1], dimensions[0], sizeof(@type@));
2667+
const int can_vectorize = contig && disjoint_or_same;
2668+
2669+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2670+
CHUNKED_VML_CALL2(v@s@Conj, dimensions[0], @type@, args[0], args[1]);
2671+
/* v@s@Conj(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
2672+
} else {
2673+
UNARY_LOOP {
2674+
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2675+
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2676+
((@ftype@ *)op1)[0] = in1r;
2677+
((@ftype@ *)op1)[1] = -in1i;
2678+
}
26702679
}
26712680
}
26722681

2673-
/* TODO: USE MKL */
26742682
void
26752683
mkl_umath_@TYPE@_absolute(char **args, const npy_intp *dimensions, const npy_intp *steps, void *NPY_UNUSED(func))
26762684
{
2685+
const int contig = IS_UNARY_CONT(@type@, @ftype@);
2686+
const int disjoint_or_same = DISJOINT_OR_SAME_TWO_DTYPES(args[0], args[1], dimensions[0], sizeof(@type@), sizeof(@ftype@));
2687+
const int can_vectorize = contig && disjoint_or_same;
26772688
int ignore_fpstatus = 0;
2678-
2679-
// FIXME: abs function VML for complex numbers breaks FFT test_basic.py
2680-
//if(steps[0]/2 == sizeof(@ftype@) && steps[1] == sizeof(@ftype@) && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
2681-
#if @SUPPORTED_BY_VML@
2682-
if(0 == 1) {
2689+
2690+
if(can_vectorize && dimensions[0] > VML_TRANSCEDENTAL_THRESHOLD) {
26832691
ignore_fpstatus = 1;
2684-
CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @ftype@, args[0], args[1]);
2685-
/* v@s@Abs(dimensions[0], (@ftype@ *) args[0], (@ftype@ *) args[1]); */
2686-
} else
2687-
#endif
2688-
{
2692+
CHUNKED_VML_CALL2(v@s@Abs, dimensions[0], @type@, args[0], args[1]);
2693+
/* v@s@Abs(dimensions[0], (@type@*) args[0], (@type@*) args[1]); */
2694+
} else {
26892695
UNARY_LOOP {
26902696
const @ftype@ in1r = ((@ftype@ *)ip1)[0];
26912697
const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2692-
if(in1r == 0.0 && in1i == 0.0){
2693-
ignore_fpstatus = 1;
2694-
}
26952698
*((@ftype@ *)op1) = hypot@c@(in1r, in1i);
26962699
}
26972700
}
26982701
if(ignore_fpstatus) {
2699-
feclearexcept(FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INVALID);
2702+
feclearexcept(FE_INVALID);
27002703
}
27012704
}
27022705

0 commit comments

Comments
 (0)