Skip to content

Commit e1a1603

Browse files
authored
[X86][AVX10.2] Remove YMM rounding from VCVTTP.*QS (#132414)
Ref: https://cdrdv2.intel.com/v1/dl/getContent/784343
1 parent 5f94992 commit e1a1603

16 files changed

+160
-821
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4615,7 +4615,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46154615
}
46164616

46174617
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4618-
def vcvttpd2dqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
4618+
def vcvttpd2dqs256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
46194619
}
46204620

46214621
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4627,7 +4627,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46274627
}
46284628

46294629
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4630-
def vcvttpd2udqs256_round_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char, _Constant int)">;
4630+
def vcvttpd2udqs256_mask : X86Builtin<"_Vector<4, int>(_Vector<4, double>, _Vector<4, int>, unsigned char)">;
46314631
}
46324632

46334633
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4639,7 +4639,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46394639
}
46404640

46414641
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4642-
def vcvttpd2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
4642+
def vcvttpd2qqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
46434643
}
46444644

46454645
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4651,7 +4651,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46514651
}
46524652

46534653
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4654-
def vcvttpd2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char, _Constant int)">;
4654+
def vcvttpd2uqqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, double>, _Vector<4, long long int>, unsigned char)">;
46554655
}
46564656

46574657
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4663,7 +4663,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46634663
}
46644664

46654665
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4666-
def vcvttps2dqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
4666+
def vcvttps2dqs256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
46674667
}
46684668

46694669
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4675,7 +4675,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46754675
}
46764676

46774677
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4678-
def vcvttps2udqs256_round_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char, _Constant int)">;
4678+
def vcvttps2udqs256_mask : X86Builtin<"_Vector<8, int>(_Vector<8, float>, _Vector<8, int>, unsigned char)">;
46794679
}
46804680

46814681
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4687,7 +4687,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46874687
}
46884688

46894689
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4690-
def vcvttps2qqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
4690+
def vcvttps2qqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
46914691
}
46924692

46934693
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
@@ -4699,7 +4699,7 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i
46994699
}
47004700

47014701
let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
4702-
def vcvttps2uqqs256_round_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char, _Constant int)">;
4702+
def vcvttps2uqqs256_mask : X86Builtin<"_Vector<4, long long int>(_Vector<4, float>, _Vector<4, long long int>, unsigned char)">;
47034703
}
47044704

47054705
let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in {

clang/lib/Headers/avx10_2satcvtdsintrin.h

Lines changed: 48 additions & 172 deletions
Large diffs are not rendered by default.

clang/lib/Sema/SemaX86.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -315,21 +315,13 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
315315
ArgNum = 4;
316316
HasRC = true;
317317
break;
318-
case X86::BI__builtin_ia32_vcvttpd2dqs256_round_mask:
319318
case X86::BI__builtin_ia32_vcvttpd2dqs512_round_mask:
320-
case X86::BI__builtin_ia32_vcvttpd2udqs256_round_mask:
321319
case X86::BI__builtin_ia32_vcvttpd2udqs512_round_mask:
322-
case X86::BI__builtin_ia32_vcvttpd2qqs256_round_mask:
323320
case X86::BI__builtin_ia32_vcvttpd2qqs512_round_mask:
324-
case X86::BI__builtin_ia32_vcvttpd2uqqs256_round_mask:
325321
case X86::BI__builtin_ia32_vcvttpd2uqqs512_round_mask:
326-
case X86::BI__builtin_ia32_vcvttps2dqs256_round_mask:
327322
case X86::BI__builtin_ia32_vcvttps2dqs512_round_mask:
328-
case X86::BI__builtin_ia32_vcvttps2udqs256_round_mask:
329323
case X86::BI__builtin_ia32_vcvttps2udqs512_round_mask:
330-
case X86::BI__builtin_ia32_vcvttps2qqs256_round_mask:
331324
case X86::BI__builtin_ia32_vcvttps2qqs512_round_mask:
332-
case X86::BI__builtin_ia32_vcvttps2uqqs256_round_mask:
333325
case X86::BI__builtin_ia32_vcvttps2uqqs512_round_mask:
334326
ArgNum = 3;
335327
break;

clang/test/CodeGen/X86/avx10_2satcvtds-builtins-errors.c

Lines changed: 0 additions & 57 deletions
This file was deleted.

clang/test/CodeGen/X86/avx10_2satcvtds-builtins-x64.c

Lines changed: 12 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -82,76 +82,40 @@ __m128i test_mm_maskz_cvtts_pd_epu64(__mmask8 U,__m128d A){
8282
// 256 bit
8383
__m256i test_mm256_cvtts_pd_epi64(__m256d A){
8484
// CHECK-LABEL: @test_mm256_cvtts_pd_epi64
85-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
85+
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.256(<4 x double>
8686
return _mm256_cvtts_pd_epi64(A);
8787
}
8888

8989
__m256i test_mm256_mask_cvtts_pd_epi64(__m256i W,__mmask8 U, __m256d A){
9090
// CHECK-LABEL: @test_mm256_mask_cvtts_pd_epi64
91-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
91+
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.256(<4 x double>
9292
return _mm256_mask_cvtts_pd_epi64(W,U, A);
9393
}
9494

9595
__m256i test_mm256_maskz_cvtts_pd_epi64(__mmask8 U, __m256d A){
9696
// CHECK-LABEL: @test_mm256_maskz_cvtts_pd_epi64
97-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
97+
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.256(<4 x double>
9898
return _mm256_maskz_cvtts_pd_epi64(U, A);
9999
}
100100

101-
__m256i test_mm256_cvtts_roundpd_epi64(__m256d A){
102-
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epi64
103-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
104-
return _mm256_cvtts_roundpd_epi64(A,_MM_FROUND_NEARBYINT );
105-
}
106-
107-
__m256i test_mm256_mask_cvtts_roundpd_epi64(__m256i W,__mmask8 U, __m256d A){
108-
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epi64
109-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
110-
return _mm256_mask_cvtts_roundpd_epi64(W,U,A,_MM_FROUND_NEARBYINT );
111-
}
112-
113-
__m256i test_mm256_maskz_cvtts_roundpd_epi64(__mmask8 U, __m256d A){
114-
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epi64
115-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2qqs.round.256(<4 x double>
116-
return _mm256_maskz_cvtts_roundpd_epi64(U,A,_MM_FROUND_NEARBYINT );
117-
}
118-
119101
__m256i test_mm256_cvtts_pd_epu64(__m256d A){
120102
// CHECK-LABEL: @test_mm256_cvtts_pd_epu64
121-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
103+
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.256(<4 x double>
122104
return _mm256_cvtts_pd_epu64(A);
123105
}
124106

125107
__m256i test_mm256_mask_cvtts_pd_epu64(__m256i W,__mmask8 U, __m256d A){
126108
// CHECK-LABEL: @test_mm256_mask_cvtts_pd_epu64
127-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
109+
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.256(<4 x double>
128110
return _mm256_mask_cvtts_pd_epu64(W,U, A);
129111
}
130112

131113
__m256i test_mm256_maskz_cvtts_pd_epu64(__mmask8 U, __m256d A){
132114
// CHECK-LABEL: @test_mm256_maskz_cvtts_pd_epu64
133-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
115+
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.256(<4 x double>
134116
return _mm256_maskz_cvtts_pd_epu64(U, A);
135117
}
136118

137-
__m256i test_mm256_cvtts_roundpd_epu64(__m256d A){
138-
// CHECK-LABEL: @test_mm256_cvtts_roundpd_epu64
139-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
140-
return _mm256_cvtts_roundpd_epu64(A,_MM_FROUND_NEARBYINT );
141-
}
142-
143-
__m256i test_mm256_mask_cvtts_roundpd_epu64(__m256i W,__mmask8 U, __m256d A){
144-
// CHECK-LABEL: @test_mm256_mask_cvtts_roundpd_epu64
145-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
146-
return _mm256_mask_cvtts_roundpd_epu64(W,U,A,_MM_FROUND_NEARBYINT );
147-
}
148-
149-
__m256i test_mm256_maskz_cvtts_roundpd_epu64(__mmask8 U, __m256d A){
150-
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundpd_epu64
151-
// CHECK: @llvm.x86.avx10.mask.vcvttpd2uqqs.round.256(<4 x double>
152-
return _mm256_maskz_cvtts_roundpd_epu64(U,A,_MM_FROUND_NEARBYINT );
153-
}
154-
155119
// 128 bit
156120
__m128i test_mm_cvtts_ps_epi64(__m128 A){
157121
// CHECK-LABEL: @test_mm_cvtts_ps_epi64
@@ -191,72 +155,36 @@ __m128i test_mm_maskz_cvtts_ps_epu64(__mmask8 U,__m128 A){
191155

192156
__m256i test_mm256_cvtts_ps_epi64(__m128 A){
193157
// CHECK-LABEL: @test_mm256_cvtts_ps_epi64
194-
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
158+
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.256(<4 x float>
195159
return _mm256_cvtts_ps_epi64(A);
196160
}
197161

198162
__m256i test_mm256_mask_cvtts_ps_epi64(__m256i W,__mmask8 U, __m128 A){
199163
// CHECK-LABEL: @test_mm256_mask_cvtts_ps_epi64
200-
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
164+
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.256(<4 x float>
201165
return _mm256_mask_cvtts_ps_epi64(W,U, A);
202166
}
203167

204168
__m256i test_mm256_maskz_cvtts_ps_epi64(__mmask8 U, __m128 A){
205169
// CHECK-LABEL: @test_mm256_maskz_cvtts_ps_epi64
206-
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
170+
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.256(<4 x float>
207171
return _mm256_maskz_cvtts_ps_epi64(U, A);
208172
}
209173

210-
__m256i test_mm256_cvtts_roundps_epi64(__m128 A){
211-
// CHECK-LABEL: @test_mm256_cvtts_roundps_epi64
212-
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
213-
return _mm256_cvtts_roundps_epi64(A, _MM_FROUND_NEARBYINT );
214-
}
215-
216-
__m256i test_mm256_mask_cvtts_roundps_epi64(__m256i W,__mmask8 U, __m128 A){
217-
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epi64
218-
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
219-
return _mm256_mask_cvtts_roundps_epi64(W,U,A,_MM_FROUND_NEARBYINT );
220-
}
221-
222-
__m256i test_mm256_maskz_cvtts_roundps_epi64(__mmask8 U, __m128 A){
223-
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epi64
224-
// CHECK: @llvm.x86.avx10.mask.vcvttps2qqs.round.256(<4 x float>
225-
return _mm256_maskz_cvtts_roundps_epi64(U,A,_MM_FROUND_NEARBYINT );
226-
}
227-
228174
__m256i test_mm256_cvtts_ps_epu64(__m128 A){
229175
// CHECK-LABEL: @test_mm256_cvtts_ps_epu64
230-
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
176+
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.256(<4 x float>
231177
return _mm256_cvtts_ps_epu64(A);
232178
}
233179

234180
__m256i test_mm256_mask_cvtts_ps_epu64(__m256i W,__mmask8 U, __m128 A){
235181
// CHECK-LABEL: @test_mm256_mask_cvtts_ps_epu64
236-
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
182+
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.256(<4 x float>
237183
return _mm256_mask_cvtts_ps_epu64(W,U, A);
238184
}
239185

240186
__m256i test_mm256_maskz_cvtts_ps_epu64(__mmask8 U, __m128 A){
241187
// CHECK-LABEL: @test_mm256_maskz_cvtts_ps_epu64
242-
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
188+
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.256(<4 x float>
243189
return _mm256_maskz_cvtts_ps_epu64(U, A);
244190
}
245-
246-
__m256i test_mm256_cvtts_roundps_epu64(__m128 A){
247-
// CHECK-LABEL: @test_mm256_cvtts_roundps_epu64
248-
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
249-
return _mm256_cvtts_roundps_epu64(A, _MM_FROUND_NEARBYINT );
250-
}
251-
252-
__m256i test_mm256_mask_cvtts_roundps_epu64(__m256i W,__mmask8 U, __m128 A){
253-
// CHECK-LABEL: @test_mm256_mask_cvtts_roundps_epu64
254-
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
255-
return _mm256_mask_cvtts_roundps_epu64(W,U,A,_MM_FROUND_NEARBYINT );
256-
}
257-
258-
__m256i test_mm256_maskz_cvtts_roundps_epu64(__mmask8 U, __m128 A){
259-
// CHECK-LABEL: @test_mm256_maskz_cvtts_roundps_epu64
260-
// CHECK: @llvm.x86.avx10.mask.vcvttps2uqqs.round.256(<4 x float>
261-
return _mm256_maskz_cvtts_roundps_epu64(U,A,_MM_FROUND_NEARBYINT );
262-
}

0 commit comments

Comments
 (0)