Skip to content

Commit 55d3f26

Browse files
authored
Merge pull request numpy#25083 from seiko2plus/backport_24806
BUG: Backport fix build on ppc64 when the baseline set to Power9 or higher
2 parents ce1adca + 809d00d commit 55d3f26

File tree

5 files changed

+43
-22
lines changed

5 files changed

+43
-22
lines changed

meson_cpu/ppc64/meson.build

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ VSX3 = mod_features.new(
2626
'VSX3', 3, implies: VSX2, args: {'val': '-mcpu=power9', 'match': '.*[mcpu=|vsx].*'},
2727
detect: {'val': 'VSX3', 'match': 'VSX.*'},
2828
test_code: files(source_root + '/numpy/distutils/checks/cpu_vsx3.c')[0],
29+
extra_tests: {
30+
'VSX3_HALF_DOUBLE': files(source_root + '/numpy/distutils/checks/extra_vsx3_half_double.c')[0]
31+
}
2932
)
3033
VSX4 = mod_features.new(
3134
'VSX4', 4, implies: VSX3, args: {'val': '-mcpu=power10', 'match': '.*[mcpu=|vsx].*'},

numpy/core/src/common/half.hpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class Half final {
3636
#endif
3737
) || (
3838
std::is_same_v<T, double> &&
39-
#if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3)
39+
#if defined(NPY_HAVE_AVX512FP16) || (defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE))
4040
true
4141
#else
4242
false
@@ -73,11 +73,8 @@ class Half final {
7373
#if defined(NPY_HAVE_AVX512FP16)
7474
__m128d md = _mm_load_sd(&f);
7575
bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(md))));
76-
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
77-
__vector double vf64 = vec_splats(f);
78-
__vector unsigned short vf16;
79-
__asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64));
80-
bits_ = vec_extract(vf16, 0);
76+
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
77+
__asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits_) : "wa" (f));
8178
#else
8279
bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
8380
#endif
@@ -96,7 +93,7 @@ class Half final {
9693
__vector float vf32;
9794
__asm__ __volatile__("xvcvhpsp %x0,%x1"
9895
: "=wa"(vf32)
99-
: "wa"(vec_splats(bits_.u)));
96+
: "wa"(vec_splats(bits_)));
10097
return vec_extract(vf32, 0);
10198
#else
10299
return BitCast<float>(half_private::ToFloatBits(bits_));
@@ -110,12 +107,12 @@ class Half final {
110107
double ret;
111108
_mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
112109
return ret;
113-
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
114-
__vector float vf64;
115-
__asm__ __volatile__("xvcvhpdp %x0,%x1"
116-
: "=wa"(vf32)
117-
: "wa"(vec_splats(bits_)));
118-
return vec_extract(vf64, 0);
110+
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
111+
double f64;
112+
__asm__ __volatile__("xscvhpdp %x0,%x1"
113+
: "=wa"(f64)
114+
: "wa"(bits_));
115+
return f64;
119116
#else
120117
return BitCast<double>(half_private::ToDoubleBits(bits_));
121118
#endif

numpy/core/tests/test_half.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,11 @@ def setup_method(self):
2121
# An array of all possible float16 values
2222
self.all_f16 = np.arange(0x10000, dtype=uint16)
2323
self.all_f16.dtype = float16
24-
self.all_f32 = np.array(self.all_f16, dtype=float32)
25-
self.all_f64 = np.array(self.all_f16, dtype=float64)
24+
25+
# NaN value can cause an invalid FP exception if HW is been used
26+
with np.errstate(invalid='ignore'):
27+
self.all_f32 = np.array(self.all_f16, dtype=float32)
28+
self.all_f64 = np.array(self.all_f16, dtype=float64)
2629

2730
# An array of all non-NaN float16 values, in sorted order
2831
self.nonan_f16 = np.concatenate(
@@ -44,14 +47,19 @@ def test_half_conversions(self):
4447
# value is preserved when converting to/from other floats.
4548

4649
# Convert from float32 back to float16
47-
b = np.array(self.all_f32, dtype=float16)
48-
assert_equal(self.all_f16.view(dtype=uint16),
49-
b.view(dtype=uint16))
50+
with np.errstate(invalid='ignore'):
51+
b = np.array(self.all_f32, dtype=float16)
52+
# avoid testing NaNs due to differ bits wither Q/SNaNs
53+
b_nn = b == b
54+
assert_equal(self.all_f16[b_nn].view(dtype=uint16),
55+
b[b_nn].view(dtype=uint16))
5056

5157
# Convert from float64 back to float16
52-
b = np.array(self.all_f64, dtype=float16)
53-
assert_equal(self.all_f16.view(dtype=uint16),
54-
b.view(dtype=uint16))
58+
with np.errstate(invalid='ignore'):
59+
b = np.array(self.all_f64, dtype=float16)
60+
b_nn = b == b
61+
assert_equal(self.all_f16[b_nn].view(dtype=uint16),
62+
b[b_nn].view(dtype=uint16))
5563

5664
# Convert float16 to longdouble and back
5765
# This doesn't necessarily preserve the extra NaN bits,

numpy/distutils/ccompiler_opt.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,8 @@ class _Config:
301301
## Power8/ISA 2.07
302302
VSX2 = dict(interest=2, implies="VSX", implies_detect=False),
303303
## Power9/ISA 3.00
304-
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False),
304+
VSX3 = dict(interest=3, implies="VSX2", implies_detect=False,
305+
extra_checks="VSX3_HALF_DOUBLE"),
305306
## Power10/ISA 3.1
306307
VSX4 = dict(interest=4, implies="VSX3", implies_detect=False,
307308
extra_checks="VSX4_MMA"),
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/**
2+
* Assembler may not fully support the following VSX3 scalar
3+
* instructions, even though compilers report VSX3 support.
4+
*/
5+
int main(void)
6+
{
7+
unsigned short bits = 0xFF;
8+
double f;
9+
__asm__ __volatile__("xscvhpdp %x0,%x1" : "=wa"(f) : "wa"(bits));
10+
__asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits) : "wa" (f));
11+
return bits;
12+
}

0 commit comments

Comments
 (0)