Skip to content

Commit 5e30bf6

Browse files
author
Jatin Bhateja
committed
8360116: Add support for AVX10 floating point minmax instruction
Reviewed-by: mhaessig, sviswanathan
1 parent c503705 commit 5e30bf6

File tree

7 files changed

+461
-42
lines changed

7 files changed

+461
-42
lines changed

src/hotspot/cpu/x86/assembler_x86.cpp

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8257,6 +8257,14 @@ void Assembler::vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
82578257
emit_int16(0x5F, (0xC0 | encode));
82588258
}
82598259

8260+
void Assembler::eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
8261+
assert(VM_Version::supports_avx10_2(), "");
8262+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
8263+
attributes.set_is_evex_instruction();
8264+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
8265+
emit_int24(0x53, (0xC0 | encode), imm8);
8266+
}
8267+
82608268
void Assembler::vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src) {
82618269
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
82628270
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -8771,12 +8779,68 @@ void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
87718779
emit_int16(0x5F, (0xC0 | encode));
87728780
}
87738781

8782+
void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) {
8783+
assert(VM_Version::supports_avx10_2(), "");
8784+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8785+
attributes.set_is_evex_instruction();
8786+
attributes.set_embedded_opmask_register_specifier(mask);
8787+
if (merge) {
8788+
attributes.reset_is_clear_context();
8789+
}
8790+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8791+
emit_int24(0x52, (0xC0 | encode), imm8);
8792+
}
8793+
8794+
void Assembler::evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) {
8795+
assert(VM_Version::supports_avx10_2(), "");
8796+
InstructionMark im(this);
8797+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8798+
attributes.set_is_evex_instruction();
8799+
attributes.set_embedded_opmask_register_specifier(mask);
8800+
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8801+
if (merge) {
8802+
attributes.reset_is_clear_context();
8803+
}
8804+
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8805+
emit_int8(0x52);
8806+
emit_operand(dst, src, 0);
8807+
emit_int8(imm8);
8808+
}
8809+
87748810
void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
87758811
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
87768812
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
87778813
emit_int16(0x5F, (0xC0 | encode));
87788814
}
87798815

8816+
void Assembler::evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) {
8817+
assert(VM_Version::supports_avx10_2(), "");
8818+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
8819+
attributes.set_is_evex_instruction();
8820+
attributes.set_embedded_opmask_register_specifier(mask);
8821+
if (merge) {
8822+
attributes.reset_is_clear_context();
8823+
}
8824+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8825+
emit_int24(0x52, (0xC0 | encode), imm8);
8826+
}
8827+
8828+
void Assembler::evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) {
8829+
assert(VM_Version::supports_avx10_2(), "");
8830+
InstructionMark im(this);
8831+
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
8832+
attributes.set_is_evex_instruction();
8833+
attributes.set_embedded_opmask_register_specifier(mask);
8834+
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
8835+
if (merge) {
8836+
attributes.reset_is_clear_context();
8837+
}
8838+
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
8839+
emit_int8(0x52);
8840+
emit_operand(dst, src, 0);
8841+
emit_int8(imm8);
8842+
}
8843+
87808844
void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
87818845
assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
87828846
InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -13119,6 +13183,14 @@ void Assembler::vminss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1311913183
emit_int16(0x5D, (0xC0 | encode));
1312013184
}
1312113185

13186+
void Assembler::eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
13187+
assert(VM_Version::supports_avx10_2(), "");
13188+
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
13189+
attributes.set_is_evex_instruction();
13190+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
13191+
emit_int24(0x53, (0xC0 | encode), imm8);
13192+
}
13193+
1312213194
void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1312313195
assert(VM_Version::supports_avx(), "");
1312413196
InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -13127,6 +13199,14 @@ void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
1312713199
emit_int16(0x5D, (0xC0 | encode));
1312813200
}
1312913201

13202+
void Assembler::eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
13203+
assert(VM_Version::supports_avx10_2(), "");
13204+
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
13205+
attributes.set_is_evex_instruction();
13206+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
13207+
emit_int24(0x53, (0xC0 | encode), imm8);
13208+
}
13209+
1313013210
void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
1313113211
assert(VM_Version::supports_avx(), "");
1313213212
assert(vector_len <= AVX_256bit, "");
@@ -16526,6 +16606,34 @@ void Assembler::evminph(XMMRegister dst, XMMRegister nds, Address src, int vecto
1652616606
emit_operand(dst, src, 0);
1652716607
}
1652816608

16609+
void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len) {
16610+
assert(VM_Version::supports_avx10_2(), "");
16611+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
16612+
attributes.set_is_evex_instruction();
16613+
attributes.set_embedded_opmask_register_specifier(mask);
16614+
if (merge) {
16615+
attributes.reset_is_clear_context();
16616+
}
16617+
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
16618+
emit_int24(0x52, (0xC0 | encode), imm8);
16619+
}
16620+
16621+
void Assembler::evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len) {
16622+
assert(VM_Version::supports_avx10_2(), "");
16623+
InstructionMark im(this);
16624+
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
16625+
attributes.set_is_evex_instruction();
16626+
attributes.set_embedded_opmask_register_specifier(mask);
16627+
if (merge) {
16628+
attributes.reset_is_clear_context();
16629+
}
16630+
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
16631+
vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
16632+
emit_int8(0x52);
16633+
emit_operand(dst, src, 0);
16634+
emit_int8(imm8);
16635+
}
16636+
1652916637
void Assembler::evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
1653016638
assert(VM_Version::supports_avx512_fp16(), "requires AVX512-FP16");
1653116639
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "");

src/hotspot/cpu/x86/assembler_x86.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,17 @@ class InstructionAttr;
441441
// See fxsave and xsave(EVEX enabled) documentation for layout
442442
const int FPUStateSizeInWords = 2688 / wordSize;
443443

444+
445+
// AVX10 new minmax instruction control mask encoding.
446+
//
447+
// imm8[4] = 0 (please refer to Table 11.1 of section 11.2 of AVX10 manual[1] for details)
448+
// imm8[3:2] (sign control) = 01 (select sign, please refer to Table 11.5 of section 11.2 of AVX10 manual[1] for details)
449+
// imm8[1:0] = 00 (min) / 01 (max)
450+
//
451+
// [1] https://www.intel.com/content/www/us/en/content-details/856721/intel-advanced-vector-extensions-10-2-intel-avx10-2-architecture-specification.html?wapkw=AVX10
452+
const int AVX10_MINMAX_MAX_COMPARE_SIGN = 0x5;
453+
const int AVX10_MINMAX_MIN_COMPARE_SIGN = 0x4;
454+
444455
// The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
445456
// level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
446457
// is what you get. The Assembler is generating code into a CodeBuffer.
@@ -2745,6 +2756,17 @@ class Assembler : public AbstractAssembler {
27452756
void minpd(XMMRegister dst, XMMRegister src);
27462757
void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
27472758

2759+
// AVX10.2 floating point minmax instructions
2760+
void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
2761+
void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
2762+
void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
2763+
void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
2764+
void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
2765+
void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
2766+
void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
2767+
void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
2768+
void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
2769+
27482770
// Maximum of packed integers
27492771
void pmaxsb(XMMRegister dst, XMMRegister src);
27502772
void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,21 @@ void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
12301230
}
12311231
}
12321232

1233+
void C2_MacroAssembler::vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
1234+
XMMRegister src1, XMMRegister src2, int vlen_enc) {
1235+
assert(opc == Op_MinV || opc == Op_MinReductionV ||
1236+
opc == Op_MaxV || opc == Op_MaxReductionV, "sanity");
1237+
1238+
int imm8 = (opc == Op_MinV || opc == Op_MinReductionV) ? AVX10_MINMAX_MIN_COMPARE_SIGN
1239+
: AVX10_MINMAX_MAX_COMPARE_SIGN;
1240+
if (elem_bt == T_FLOAT) {
1241+
evminmaxps(dst, mask, src1, src2, true, imm8, vlen_enc);
1242+
} else {
1243+
assert(elem_bt == T_DOUBLE, "");
1244+
evminmaxpd(dst, mask, src1, src2, true, imm8, vlen_enc);
1245+
}
1246+
}
1247+
12331248
// Float/Double signum
12341249
void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, XMMRegister one) {
12351250
assert(opcode == Op_SignumF || opcode == Op_SignumD, "sanity");
@@ -2537,12 +2552,21 @@ void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_vali
25372552
} else { // i = [0,1]
25382553
vpermilps(wtmp, wsrc, permconst[i], vlen_enc);
25392554
}
2540-
vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2555+
2556+
if (VM_Version::supports_avx10_2()) {
2557+
vminmax_fp(opcode, T_FLOAT, wdst, k0, wtmp, wsrc, vlen_enc);
2558+
} else {
2559+
vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2560+
}
25412561
wsrc = wdst;
25422562
vlen_enc = Assembler::AVX_128bit;
25432563
}
25442564
if (is_dst_valid) {
2545-
vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2565+
if (VM_Version::supports_avx10_2()) {
2566+
vminmax_fp(opcode, T_FLOAT, dst, k0, wdst, dst, Assembler::AVX_128bit);
2567+
} else {
2568+
vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2569+
}
25462570
}
25472571
}
25482572

@@ -2568,12 +2592,23 @@ void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_val
25682592
assert(i == 0, "%d", i);
25692593
vpermilpd(wtmp, wsrc, 1, vlen_enc);
25702594
}
2571-
vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2595+
2596+
if (VM_Version::supports_avx10_2()) {
2597+
vminmax_fp(opcode, T_DOUBLE, wdst, k0, wtmp, wsrc, vlen_enc);
2598+
} else {
2599+
vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
2600+
}
2601+
25722602
wsrc = wdst;
25732603
vlen_enc = Assembler::AVX_128bit;
25742604
}
2605+
25752606
if (is_dst_valid) {
2576-
vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2607+
if (VM_Version::supports_avx10_2()) {
2608+
vminmax_fp(opcode, T_DOUBLE, dst, k0, wdst, dst, Assembler::AVX_128bit);
2609+
} else {
2610+
vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
2611+
}
25772612
}
25782613
}
25792614

src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@
7272
XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
7373
int vlen_enc);
7474

75+
void vminmax_fp(int opc, BasicType elem_bt, XMMRegister dst, KRegister mask,
76+
XMMRegister src1, XMMRegister src2, int vlen_enc);
77+
7578
void vpuminmaxq(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2, XMMRegister xtmp1, XMMRegister xtmp2, int vlen_enc);
7679

7780
void evminmax_fp(int opcode, BasicType elem_bt,

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8841,6 +8841,10 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM
88418841
evpminsd(dst, mask, nds, src, merge, vector_len); break;
88428842
case T_LONG:
88438843
evpminsq(dst, mask, nds, src, merge, vector_len); break;
8844+
case T_FLOAT:
8845+
evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break;
8846+
case T_DOUBLE:
8847+
evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break;
88448848
default:
88458849
fatal("Unexpected type argument %s", type2name(type)); break;
88468850
}
@@ -8856,6 +8860,10 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM
88568860
evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
88578861
case T_LONG:
88588862
evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
8863+
case T_FLOAT:
8864+
evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
8865+
case T_DOUBLE:
8866+
evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
88598867
default:
88608868
fatal("Unexpected type argument %s", type2name(type)); break;
88618869
}
@@ -8871,6 +8879,10 @@ void MacroAssembler::evpmins(BasicType type, XMMRegister dst, KRegister mask, XM
88718879
evpminsd(dst, mask, nds, src, merge, vector_len); break;
88728880
case T_LONG:
88738881
evpminsq(dst, mask, nds, src, merge, vector_len); break;
8882+
case T_FLOAT:
8883+
evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break;
8884+
case T_DOUBLE:
8885+
evminmaxpd(dst, mask, nds, src, merge, AVX10_MINMAX_MIN_COMPARE_SIGN, vector_len); break;
88748886
default:
88758887
fatal("Unexpected type argument %s", type2name(type)); break;
88768888
}
@@ -8886,6 +8898,10 @@ void MacroAssembler::evpmaxs(BasicType type, XMMRegister dst, KRegister mask, XM
88868898
evpmaxsd(dst, mask, nds, src, merge, vector_len); break;
88878899
case T_LONG:
88888900
evpmaxsq(dst, mask, nds, src, merge, vector_len); break;
8901+
case T_FLOAT:
8902+
evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
8903+
case T_DOUBLE:
8904+
evminmaxps(dst, mask, nds, src, merge, AVX10_MINMAX_MAX_COMPARE_SIGN, vector_len); break;
88898905
default:
88908906
fatal("Unexpected type argument %s", type2name(type)); break;
88918907
}

0 commit comments

Comments
 (0)