@@ -735,6 +735,68 @@ SYCL_EXTERNAL void test_four_shorts(sycl::sub_group &sg, span<short, 4> v,
735
735
group_store (sg, v, p, opt_striped{});
736
736
}
737
737
738
+ // CHECK-LABEL: @_ZN7striped19test_sixteen_shortsERN4sycl3_V19sub_groupENS1_4spanIsLm16EEEPU3AS1s(
739
+ // CHECK-NEXT: entry:
740
+ // CHECK-NEXT: [[VALUES_I:%.*]] = alloca [16 x i16], align 2
741
+ // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA22]]
742
+ // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4)
743
+ // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne ptr addrspace(1) [[P:%.*]], null
744
+ // CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP_I_I]])
745
+ // CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(1) [[P]] to i64
746
+ // CHECK-NEXT: [[REM_I_I:%.*]] = and i64 [[TMP2]], 15
747
+ // CHECK-NEXT: [[CMP1_I_NOT_I:%.*]] = icmp eq i64 [[REM_I_I]], 0
748
+ // CHECK-NEXT: br i1 [[CMP1_I_NOT_I]], label [[IF_END_I:%.*]], label [[IF_THEN_I:%.*]]
749
+ // CHECK: if.then.i:
750
+ // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]]
751
+ // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupLocalInvocationId, align 4, !tbaa [[TBAA8]], !noalias [[META107:![0-9]+]]
752
+ // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(1) @__spirv_BuiltInSubgroupSize, align 4, !tbaa [[TBAA8]], !noalias [[META110:![0-9]+]]
753
+ // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]]
754
+ // CHECK: for.cond.i.i:
755
+ // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i32 [ 0, [[IF_THEN_I]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ]
756
+ // CHECK-NEXT: [[CMP_I19_I:%.*]] = icmp samesign ult i32 [[I_0_I_I]], 16
757
+ // CHECK-NEXT: br i1 [[CMP_I19_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEENSB_INS9_9NAIVE_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESR_NS0_4SPANISP_XT1_EEESQ_SS__EXIT_I:%.*]]
758
+ // CHECK: for.body.i.i:
759
+ // CHECK-NEXT: [[CONV_I_I:%.*]] = zext nneg i32 [[I_0_I_I]] to i64
760
+ // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I_I]]
761
+ // CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I_I_I]], align 2, !tbaa [[TBAA27]]
762
+ // CHECK-NEXT: [[MUL_I_I_I:%.*]] = mul i32 [[TMP4]], [[I_0_I_I]]
763
+ // CHECK-NEXT: [[ADD_I_I_I:%.*]] = add i32 [[TMP3]], [[MUL_I_I_I]]
764
+ // CHECK-NEXT: [[IDXPROM_I_I:%.*]] = sext i32 [[ADD_I_I_I]] to i64
765
+ // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i16, ptr addrspace(1) [[P]], i64 [[IDXPROM_I_I]]
766
+ // CHECK-NEXT: store i16 [[TMP5]], ptr addrspace(1) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA27]]
767
+ // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i32 [[I_0_I_I]], 1
768
+ // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP113:![0-9]+]]
769
+ // CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEENSB_INS9_9naive_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESR_NS0_4spanISP_XT1_EEESQ_SS_.exit.i:
770
+ // CHECK-NEXT: tail call spir_func void @_Z22__spirv_ControlBarrierjjj(i32 noundef 3, i32 noundef 3, i32 noundef 912) #[[ATTR5]]
771
+ // CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT:%.*]]
772
+ // CHECK: if.end.i:
773
+ // CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[VALUES_I]]) #[[ATTR7]]
774
+ // CHECK-NEXT: br label [[FOR_COND_I:%.*]]
775
+ // CHECK: for.cond.i:
776
+ // CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[IF_END_I]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
777
+ // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i32 [[I_0_I]], 16
778
+ // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[FOR_COND_CLEANUP_I:%.*]]
779
+ // CHECK: for.cond.cleanup.i:
780
+ // CHECK-NEXT: [[TMP6:%.*]] = load <16 x i16>, ptr [[VALUES_I]], align 2, !tbaa [[TBAA31]]
781
+ // CHECK-NEXT: tail call spir_func void @_Z31__spirv_SubgroupBlockWriteINTELIDv16_tEvPU3AS1tT_(ptr addrspace(1) noundef nonnull [[P]], <16 x i16> noundef [[TMP6]]) #[[ATTR5]]
782
+ // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VALUES_I]]) #[[ATTR7]]
783
+ // CHECK-NEXT: br label [[_ZN4SYCL3_V13EXT6ONEAPI12EXPERIMENTAL11GROUP_STOREINS0_9SUB_GROUPESLM16EPU3AS1SNS3_10PROPERTIESINS3_6DETAIL20PROPERTIES_TYPE_LISTIJNS3_14PROPERTY_VALUEINS3_18DATA_PLACEMENT_KEYEJST17INTEGRAL_CONSTANTIILI1EEEEENSB_INS3_21CONTIGUOUS_MEMORY_KEYEJEEENSB_INS3_14FULL_GROUP_KEYEJEEEEEEEEEENST9ENABLE_IFIXAAAASR6DETAILE18VERIFY_STORE_TYPESIT0_T2_ESR6DETAILE18IS_GENERIC_GROUP_VIT_E18IS_PROPERTY_LIST_VIT3_EEVE4TYPEESP_NS0_4SPANISN_XT1_EEESO_SQ__EXIT]]
784
+ // CHECK: for.body.i:
785
+ // CHECK-NEXT: [[CONV_I:%.*]] = zext nneg i32 [[I_0_I]] to i64
786
+ // CHECK-NEXT: [[ARRAYIDX_I20_I:%.*]] = getelementptr inbounds nuw i16, ptr addrspace(4) [[TMP1]], i64 [[CONV_I]]
787
+ // CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[ARRAYIDX_I20_I]], align 2, !tbaa [[TBAA27]]
788
+ // CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [16 x i16], ptr [[VALUES_I]], i64 0, i64 [[CONV_I]]
789
+ // CHECK-NEXT: store i16 [[TMP7]], ptr [[ARRAYIDX_I]], align 2, !tbaa [[TBAA27]]
790
+ // CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
791
+ // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP114:![0-9]+]]
792
+ // CHECK: _ZN4sycl3_V13ext6oneapi12experimental11group_storeINS0_9sub_groupEsLm16EPU3AS1sNS3_10propertiesINS3_6detail20properties_type_listIJNS3_14property_valueINS3_18data_placement_keyEJSt17integral_constantIiLi1EEEEENSB_INS3_21contiguous_memory_keyEJEEENSB_INS3_14full_group_keyEJEEEEEEEEEENSt9enable_ifIXaaaasr6detailE18verify_store_typesIT0_T2_Esr6detailE18is_generic_group_vIT_E18is_property_list_vIT3_EEvE4typeESP_NS0_4spanISN_XT1_EEESO_SQ_.exit:
793
+ // CHECK-NEXT: ret void
794
+ //
795
+ SYCL_EXTERNAL void test_sixteen_shorts (sycl::sub_group &sg, span<short , 16 > v,
796
+ plain_global_ptr<short > p) {
797
+ group_store (sg, v, p, opt_striped{});
798
+ }
799
+
738
800
// CHECK-LABEL: @_ZN7striped21test_non_power_of_twoERN4sycl3_V19sub_groupENS1_4spanIiLm3EEEPU3AS1i(
739
801
// CHECK-NEXT: entry:
740
802
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[V:%.*]], align 8, !tbaa [[TBAA15]]
0 commit comments