Skip to content

Commit d75ea7e

Browse files
author
Xiaohong Gong
committed
8355563: VectorAPI: Refactor current implementation of subword gather load API
Reviewed-by: epeter, psandoz, sviswanathan, jbhateja
1 parent e9a4341 commit d75ea7e

File tree

15 files changed

+274
-341
lines changed

15 files changed

+274
-341
lines changed

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,67 +1598,52 @@ void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src,
15981598
}
15991599
}
16001600

1601-
void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt,
1602-
XMMRegister dst, Register base,
1603-
Register idx_base,
1604-
Register offset, Register mask,
1605-
Register mask_idx, Register rtmp,
1606-
int vlen_enc) {
1601+
void C2_MacroAssembler::vgather8b_masked(BasicType elem_bt, XMMRegister dst,
1602+
Register base, Register idx_base,
1603+
Register mask, Register mask_idx,
1604+
Register rtmp, int vlen_enc) {
16071605
vpxor(dst, dst, dst, vlen_enc);
16081606
if (elem_bt == T_SHORT) {
16091607
for (int i = 0; i < 4; i++) {
1610-
// dst[i] = mask[i] ? src[offset + idx_base[i]] : 0
1608+
// dst[i] = mask[i] ? src[idx_base[i]] : 0
16111609
Label skip_load;
16121610
btq(mask, mask_idx);
16131611
jccb(Assembler::carryClear, skip_load);
16141612
movl(rtmp, Address(idx_base, i * 4));
1615-
if (offset != noreg) {
1616-
addl(rtmp, offset);
1617-
}
16181613
pinsrw(dst, Address(base, rtmp, Address::times_2), i);
16191614
bind(skip_load);
16201615
incq(mask_idx);
16211616
}
16221617
} else {
16231618
assert(elem_bt == T_BYTE, "");
16241619
for (int i = 0; i < 8; i++) {
1625-
// dst[i] = mask[i] ? src[offset + idx_base[i]] : 0
1620+
// dst[i] = mask[i] ? src[idx_base[i]] : 0
16261621
Label skip_load;
16271622
btq(mask, mask_idx);
16281623
jccb(Assembler::carryClear, skip_load);
16291624
movl(rtmp, Address(idx_base, i * 4));
1630-
if (offset != noreg) {
1631-
addl(rtmp, offset);
1632-
}
16331625
pinsrb(dst, Address(base, rtmp), i);
16341626
bind(skip_load);
16351627
incq(mask_idx);
16361628
}
16371629
}
16381630
}
16391631

1640-
void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst,
1641-
Register base, Register idx_base,
1642-
Register offset, Register rtmp,
1643-
int vlen_enc) {
1632+
void C2_MacroAssembler::vgather8b(BasicType elem_bt, XMMRegister dst,
1633+
Register base, Register idx_base,
1634+
Register rtmp, int vlen_enc) {
16441635
vpxor(dst, dst, dst, vlen_enc);
16451636
if (elem_bt == T_SHORT) {
16461637
for (int i = 0; i < 4; i++) {
1647-
// dst[i] = src[offset + idx_base[i]]
1638+
// dst[i] = src[idx_base[i]]
16481639
movl(rtmp, Address(idx_base, i * 4));
1649-
if (offset != noreg) {
1650-
addl(rtmp, offset);
1651-
}
16521640
pinsrw(dst, Address(base, rtmp, Address::times_2), i);
16531641
}
16541642
} else {
16551643
assert(elem_bt == T_BYTE, "");
16561644
for (int i = 0; i < 8; i++) {
1657-
// dst[i] = src[offset + idx_base[i]]
1645+
// dst[i] = src[idx_base[i]]
16581646
movl(rtmp, Address(idx_base, i * 4));
1659-
if (offset != noreg) {
1660-
addl(rtmp, offset);
1661-
}
16621647
pinsrb(dst, Address(base, rtmp), i);
16631648
}
16641649
}
@@ -1687,11 +1672,10 @@ void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst,
16871672
*/
16881673
void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst,
16891674
Register base, Register idx_base,
1690-
Register offset, Register mask,
1691-
XMMRegister xtmp1, XMMRegister xtmp2,
1692-
XMMRegister temp_dst, Register rtmp,
1693-
Register mask_idx, Register length,
1694-
int vector_len, int vlen_enc) {
1675+
Register mask, XMMRegister xtmp1,
1676+
XMMRegister xtmp2, XMMRegister temp_dst,
1677+
Register rtmp, Register mask_idx,
1678+
Register length, int vector_len, int vlen_enc) {
16951679
Label GATHER8_LOOP;
16961680
assert(is_subword_type(elem_ty), "");
16971681
movl(length, vector_len);
@@ -1705,9 +1689,9 @@ void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst,
17051689
bind(GATHER8_LOOP);
17061690
// TMP_VEC_64(temp_dst) = PICK_SUB_WORDS_FROM_GATHER_INDICES
17071691
if (mask == noreg) {
1708-
vgather8b_offset(elem_ty, temp_dst, base, idx_base, offset, rtmp, vlen_enc);
1692+
vgather8b(elem_ty, temp_dst, base, idx_base, rtmp, vlen_enc);
17091693
} else {
1710-
vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc);
1694+
vgather8b_masked(elem_ty, temp_dst, base, idx_base, mask, mask_idx, rtmp, vlen_enc);
17111695
}
17121696
// TEMP_PERM_VEC(temp_dst) = PERMUTE TMP_VEC_64(temp_dst) PERM_INDEX(xtmp1)
17131697
vpermd(temp_dst, xtmp1, temp_dst, vlen_enc == Assembler::AVX_512bit ? vlen_enc : Assembler::AVX_256bit);

src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -495,15 +495,14 @@
495495

496496
void efp16sh(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
497497

498-
void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register offset,
499-
Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
498+
void vgather_subword(BasicType elem_ty, XMMRegister dst, Register base, Register idx_base, Register mask,
499+
XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp,
500500
Register midx, Register length, int vector_len, int vlen_enc);
501501

502-
void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
503-
Register offset, Register mask, Register midx, Register rtmp, int vlen_enc);
504-
505-
void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
506-
Register offset, Register rtmp, int vlen_enc);
502+
void vgather8b_masked(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
503+
Register mask, Register midx, Register rtmp, int vlen_enc);
504+
void vgather8b(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base,
505+
Register rtmp, int vlen_enc);
507506

508507
void vector_saturating_op(int opc, BasicType elem_bt, XMMRegister dst, XMMRegister src1, XMMRegister src2, bool is_unsigned, int vlen_enc);
509508

0 commit comments

Comments
 (0)