@@ -822,7 +822,46 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
822
822
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32:
823
823
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64:
824
824
case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32:
825
- case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: {
825
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64:
826
+ // GFX1250 WMMA builtins
827
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
828
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
829
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
830
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
831
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
832
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
833
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
834
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
835
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
836
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
837
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
838
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
839
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
840
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
841
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
842
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
843
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
844
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
845
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
846
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
847
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
848
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
849
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
850
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
851
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
852
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
853
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
854
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
855
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
856
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
857
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
858
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
859
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
860
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
861
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
862
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
863
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
864
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8: {
826
865
827
866
// These operations perform a matrix multiplication and accumulation of
828
867
// the form:
@@ -837,6 +876,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
837
876
// "false".
838
877
bool AppendFalseForOpselArg = false ;
839
878
unsigned BuiltinWMMAOp;
879
+ // Need return type when D and C are of different types.
880
+ bool NeedReturnType = false ;
840
881
841
882
switch (BuiltinID) {
842
883
case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32:
@@ -975,6 +1016,160 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
975
1016
ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 }; // CD, A, B, Index
976
1017
BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8;
977
1018
break ;
1019
+ // GFX1250 WMMA builtins
1020
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x4_f32:
1021
+ ArgsForMatchingMatrixTypes = {5 , 1 };
1022
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x4_f32;
1023
+ break ;
1024
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_bf16:
1025
+ ArgsForMatchingMatrixTypes = {5 , 1 };
1026
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_bf16;
1027
+ break ;
1028
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x32_f16:
1029
+ ArgsForMatchingMatrixTypes = {5 , 1 };
1030
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x32_f16;
1031
+ break ;
1032
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x32_f16:
1033
+ ArgsForMatchingMatrixTypes = {5 , 1 };
1034
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x32_f16;
1035
+ break ;
1036
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x32_bf16:
1037
+ ArgsForMatchingMatrixTypes = {5 , 1 };
1038
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16;
1039
+ break ;
1040
+ case AMDGPU::BI__builtin_amdgcn_wmma_bf16f32_16x16x32_bf16:
1041
+ NeedReturnType = true ;
1042
+ ArgsForMatchingMatrixTypes = {1 , 5 };
1043
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16;
1044
+ break ;
1045
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_fp8:
1046
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1047
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_fp8;
1048
+ break ;
1049
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_fp8_bf8:
1050
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1051
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_fp8_bf8;
1052
+ break ;
1053
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_fp8:
1054
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1055
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_fp8;
1056
+ break ;
1057
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x64_bf8_bf8:
1058
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1059
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x64_bf8_bf8;
1060
+ break ;
1061
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_fp8:
1062
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1063
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_fp8;
1064
+ break ;
1065
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_fp8_bf8:
1066
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1067
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_fp8_bf8;
1068
+ break ;
1069
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_fp8:
1070
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1071
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_fp8;
1072
+ break ;
1073
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x64_bf8_bf8:
1074
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1075
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x64_bf8_bf8;
1076
+ break ;
1077
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_fp8:
1078
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1079
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_fp8;
1080
+ break ;
1081
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_fp8_bf8:
1082
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1083
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_fp8_bf8;
1084
+ break ;
1085
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_fp8:
1086
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1087
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_fp8;
1088
+ break ;
1089
+ case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x128_bf8_bf8:
1090
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1091
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x128_bf8_bf8;
1092
+ break ;
1093
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_fp8:
1094
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1095
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_fp8;
1096
+ break ;
1097
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_fp8_bf8:
1098
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1099
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_fp8_bf8;
1100
+ break ;
1101
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_fp8:
1102
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1103
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_fp8;
1104
+ break ;
1105
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x128_bf8_bf8:
1106
+ ArgsForMatchingMatrixTypes = {3 , 0 };
1107
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x128_bf8_bf8;
1108
+ break ;
1109
+ case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x64_iu8:
1110
+ ArgsForMatchingMatrixTypes = {4 , 1 };
1111
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x64_iu8;
1112
+ break ;
1113
+ case AMDGPU::BI__builtin_amdgcn_wmma_f32_32x16x128_f4:
1114
+ ArgsForMatchingMatrixTypes = {3 , 0 , 1 };
1115
+ BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_32x16x128_f4;
1116
+ break ;
1117
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_f16:
1118
+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1119
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_f16;
1120
+ break ;
1121
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x64_bf16:
1122
+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1123
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x64_bf16;
1124
+ break ;
1125
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x64_f16:
1126
+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1127
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x64_f16;
1128
+ break ;
1129
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x64_bf16:
1130
+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1131
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x64_bf16;
1132
+ break ;
1133
+ case AMDGPU::BI__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16:
1134
+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1135
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16f32_16x16x64_bf16;
1136
+ break ;
1137
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8:
1138
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1139
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_fp8;
1140
+ break ;
1141
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8:
1142
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1143
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_fp8_bf8;
1144
+ break ;
1145
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8:
1146
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1147
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_fp8;
1148
+ break ;
1149
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8:
1150
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1151
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x128_bf8_bf8;
1152
+ break ;
1153
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8:
1154
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1155
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_fp8;
1156
+ break ;
1157
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8:
1158
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1159
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_fp8_bf8;
1160
+ break ;
1161
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8:
1162
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1163
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_fp8;
1164
+ break ;
1165
+ case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8:
1166
+ ArgsForMatchingMatrixTypes = {2 , 0 , 1 , 3 };
1167
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x128_bf8_bf8;
1168
+ break ;
1169
+ case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x128_iu8:
1170
+ ArgsForMatchingMatrixTypes = {4 , 1 , 3 , 5 };
1171
+ BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8;
1172
+ break ;
978
1173
}
979
1174
980
1175
SmallVector<Value *, 6 > Args;
@@ -984,6 +1179,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
984
1179
Args.push_back (Builder.getFalse ());
985
1180
986
1181
SmallVector<llvm::Type *, 6 > ArgTypes;
1182
+ if (NeedReturnType)
1183
+ ArgTypes.push_back (ConvertType (E->getType ()));
987
1184
for (auto ArgIdx : ArgsForMatchingMatrixTypes)
988
1185
ArgTypes.push_back (Args[ArgIdx]->getType ());
989
1186
0 commit comments