1
- // RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s
2
- // RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s
1
+ // RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
2
+ // RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X64
3
+ // RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
4
+ // RUN: %clang_cc1 -no-opaque-pointers -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=CHECK,X86
3
5
4
6
5
7
#include <immintrin.h>
@@ -466,8 +468,11 @@ __m128i test_mm_mask_i32gather_epi64(__m128i a, long long const *b, __m128i c, _
466
468
}
467
469
468
470
__m256i test_mm256_i32gather_epi64 (long long const * b , __m128i c ) {
469
- // CHECK-LABEL: test_mm256_i32gather_epi64
470
- // CHECK: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
471
+ // X64-LABEL: test_mm256_i32gather_epi64
472
+ // X64: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
473
+ //
474
+ // X86-LABEL: test_mm256_i32gather_epi64
475
+ // X86: call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %{{.*}}, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
471
476
return _mm256_i32gather_epi64 (b , c , 2 );
472
477
}
473
478
@@ -478,11 +483,17 @@ __m256i test_mm256_mask_i32gather_epi64(__m256i a, long long const *b, __m128i c
478
483
}
479
484
480
485
__m128d test_mm_i32gather_pd (double const * b , __m128i c ) {
481
- // CHECK-LABEL: test_mm_i32gather_pd
482
- // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
483
- // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
484
- // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
485
- // CHECK: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
486
+ // X64-LABEL: test_mm_i32gather_pd
487
+ // X64: [[CMP:%.*]] = fcmp oeq <2 x double>
488
+ // X64-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
489
+ // X64-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
490
+ // X64: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
491
+ //
492
+ // X86-LABEL: test_mm_i32gather_pd
493
+ // X86: [[CMP:%.*]] = fcmp oeq <2 x double>
494
+ // X86-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
495
+ // X86-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
496
+ // X86: call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %{{.*}}, i8* %{{.*}}, <4 x i32> %{{.*}}, <2 x double> %{{.*}}, i8 2)
486
497
return _mm_i32gather_pd (b , c , 2 );
487
498
}
488
499
@@ -493,11 +504,17 @@ __m128d test_mm_mask_i32gather_pd(__m128d a, double const *b, __m128i c, __m128d
493
504
}
494
505
495
506
__m256d test_mm256_i32gather_pd (double const * b , __m128i c ) {
496
- // CHECK-LABEL: test_mm256_i32gather_pd
497
- // CHECK: [[CMP:%.*]] = fcmp oeq <4 x double>
498
- // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
499
- // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
500
- // CHECK: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
507
+ // X64-LABEL: test_mm256_i32gather_pd
508
+ // X64: [[CMP:%.*]] = fcmp oeq <4 x double>
509
+ // X64-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
510
+ // X64-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
511
+ // X64: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
512
+ //
513
+ // X86-LABEL: test_mm256_i32gather_pd
514
+ // X86: [[CMP:%.*]] = fcmp oeq <4 x double>
515
+ // X86-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
516
+ // X86-NEXT: [[BC:%.*]] = bitcast <4 x i64> [[SEXT]] to <4 x double>
517
+ // X86: call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %{{.*}}, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x double> %{{.*}}, i8 2)
501
518
return _mm256_i32gather_pd (b , c , 2 );
502
519
}
503
520
@@ -508,11 +525,17 @@ __m256d test_mm256_mask_i32gather_pd(__m256d a, double const *b, __m128i c, __m2
508
525
}
509
526
510
527
__m128 test_mm_i32gather_ps (float const * b , __m128i c ) {
511
- // CHECK-LABEL: test_mm_i32gather_ps
512
- // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
513
- // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
514
- // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
515
- // CHECK: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
528
+ // X64-LABEL: test_mm_i32gather_ps
529
+ // X64: [[CMP:%.*]] = fcmp oeq <4 x float>
530
+ // X64-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
531
+ // X64-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
532
+ // X64: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
533
+ //
534
+ // X86-LABEL: test_mm_i32gather_ps
535
+ // X86: [[CMP:%.*]] = fcmp oeq <4 x float>
536
+ // X86-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
537
+ // X86-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
538
+ // X86: call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %{{.*}}, i8* %{{.*}}, <4 x i32> %{{.*}}, <4 x float> %{{.*}}, i8 2)
516
539
return _mm_i32gather_ps (b , c , 2 );
517
540
}
518
541
@@ -523,11 +546,17 @@ __m128 test_mm_mask_i32gather_ps(__m128 a, float const *b, __m128i c, __m128 d)
523
546
}
524
547
525
548
__m256 test_mm256_i32gather_ps (float const * b , __m256i c ) {
526
- // CHECK-LABEL: test_mm256_i32gather_ps
527
- // CHECK: [[CMP:%.*]] = fcmp oeq <8 x float>
528
- // CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
529
- // CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
530
- // CHECK: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
549
+ // X64-LABEL: test_mm256_i32gather_ps
550
+ // X64: [[CMP:%.*]] = fcmp oeq <8 x float>
551
+ // X64-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
552
+ // X64-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
553
+ // X64: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> zeroinitializer, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
554
+ //
555
+ // X86-LABEL: test_mm256_i32gather_ps
556
+ // X86: [[CMP:%.*]] = fcmp oeq <8 x float>
557
+ // X86-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
558
+ // X86-NEXT: [[BC:%.*]] = bitcast <8 x i32> [[SEXT]] to <8 x float>
559
+ // X86: call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %{{.*}}, i8* %{{.*}}, <8 x i32> %{{.*}}, <8 x float> %{{.*}}, i8 2)
531
560
return _mm256_i32gather_ps (b , c , 2 );
532
561
}
533
562
@@ -574,8 +603,11 @@ __m128i test_mm_mask_i64gather_epi64(__m128i a, long long const *b, __m128i c, _
574
603
}
575
604
576
605
__m256i test_mm256_i64gather_epi64 (long long const * b , __m256i c ) {
577
- // CHECK-LABEL: test_mm256_i64gather_epi64
578
- // CHECK: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
606
+ // X64-LABEL: test_mm256_i64gather_epi64
607
+ // X64: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
608
+ //
609
+ // X86-LABEL: test_mm256_i64gather_epi64
610
+ // X86: call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %{{.*}}, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 2)
579
611
return _mm256_i64gather_epi64 (b , c , 2 );
580
612
}
581
613
@@ -586,11 +618,17 @@ __m256i test_mm256_mask_i64gather_epi64(__m256i a, long long const *b, __m256i c
586
618
}
587
619
588
620
__m128d test_mm_i64gather_pd (double const * b , __m128i c ) {
589
- // CHECK-LABEL: test_mm_i64gather_pd
590
- // CHECK: [[CMP:%.*]] = fcmp oeq <2 x double>
591
- // CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
592
- // CHECK-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
593
- // CHECK: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
621
+ // X64-LABEL: test_mm_i64gather_pd
622
+ // X64: [[CMP:%.*]] = fcmp oeq <2 x double>
623
+ // X64-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
624
+ // X64-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
625
+ // X64: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
626
+ //
627
+ // X86-LABEL: test_mm_i64gather_pd
628
+ // X86: [[CMP:%.*]] = fcmp oeq <2 x double>
629
+ // X86-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
630
+ // X86-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
631
+ // X86: call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %{{.*}}, i8* %{{.*}}, <2 x i64> %{{.*}}, <2 x double> %{{.*}}, i8 2)
594
632
return _mm_i64gather_pd (b , c , 2 );
595
633
}
596
634
@@ -601,9 +639,13 @@ __m128d test_mm_mask_i64gather_pd(__m128d a, double const *b, __m128i c, __m128d
601
639
}
602
640
603
641
__m256d test_mm256_i64gather_pd (double const * b , __m256i c ) {
604
- // CHECK-LABEL: test_mm256_i64gather_pd
605
- // CHECK: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
606
- // CHECK: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
642
+ // X64-LABEL: test_mm256_i64gather_pd
643
+ // X64: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
644
+ // X64: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
645
+ //
646
+ // X86-LABEL: test_mm256_i64gather_pd
647
+ // X86: fcmp oeq <4 x double> %{{.*}}, %{{.*}}
648
+ // X86: call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %{{.*}}, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x double> %{{.*}}, i8 2)
607
649
return _mm256_i64gather_pd (b , c , 2 );
608
650
}
609
651
@@ -614,11 +656,17 @@ __m256d test_mm256_mask_i64gather_pd(__m256d a, double const *b, __m256i c, __m2
614
656
}
615
657
616
658
__m128 test_mm_i64gather_ps (float const * b , __m128i c ) {
617
- // CHECK-LABEL: test_mm_i64gather_ps
618
- // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
619
- // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
620
- // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
621
- // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
659
+ // X64-LABEL: test_mm_i64gather_ps
660
+ // X64: [[CMP:%.*]] = fcmp oeq <4 x float>
661
+ // X64-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
662
+ // X64-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
663
+ // X64: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> zeroinitializer, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
664
+ //
665
+ // X86-LABEL: test_mm_i64gather_ps
666
+ // X86: [[CMP:%.*]] = fcmp oeq <4 x float>
667
+ // X86-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
668
+ // X86-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
669
+ // X86: call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %{{.*}}, i8* %{{.*}}, <2 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
622
670
return _mm_i64gather_ps (b , c , 2 );
623
671
}
624
672
@@ -629,11 +677,17 @@ __m128 test_mm_mask_i64gather_ps(__m128 a, float const *b, __m128i c, __m128 d)
629
677
}
630
678
631
679
__m128 test_mm256_i64gather_ps (float const * b , __m256i c ) {
632
- // CHECK-LABEL: test_mm256_i64gather_ps
633
- // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float>
634
- // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
635
- // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
636
- // CHECK: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
680
+ // X64-LABEL: test_mm256_i64gather_ps
681
+ // X64: [[CMP:%.*]] = fcmp oeq <4 x float>
682
+ // X64-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
683
+ // X64-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
684
+ // X64: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> zeroinitializer, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
685
+ //
686
+ // X86-LABEL: test_mm256_i64gather_ps
687
+ // X86: [[CMP:%.*]] = fcmp oeq <4 x float>
688
+ // X86-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
689
+ // X86-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
690
+ // X86: call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %{{.*}}, i8* %{{.*}}, <4 x i64> %{{.*}}, <4 x float> %{{.*}}, i8 2)
637
691
return _mm256_i64gather_ps (b , c , 2 );
638
692
}
639
693
0 commit comments