@@ -35,14 +35,100 @@ int main() {
35
35
matrix_type::sint32, matrix_type::sint32},
36
36
{8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
37
37
matrix_type::fp32, matrix_type::fp32},
38
+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
39
+ matrix_type::fp16, matrix_type::fp32},
40
+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
41
+ matrix_type::fp32, matrix_type::fp16},
42
+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
43
+ matrix_type::fp16, matrix_type::fp16},
44
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
45
+ matrix_type::fp32, matrix_type::fp32},
46
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
47
+ matrix_type::fp32, matrix_type::fp16},
48
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
49
+ matrix_type::fp16, matrix_type::fp32},
50
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::fp16, matrix_type::fp16,
51
+ matrix_type::fp16, matrix_type::fp16},
52
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
53
+ matrix_type::fp32, matrix_type::fp32},
54
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
55
+ matrix_type::fp16, matrix_type::fp32},
56
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
57
+ matrix_type::fp32, matrix_type::fp16},
58
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
59
+ matrix_type::fp16, matrix_type::fp16},
60
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
61
+ matrix_type::fp32, matrix_type::fp32},
62
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
63
+ matrix_type::fp16, matrix_type::fp32},
64
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
65
+ matrix_type::fp32, matrix_type::fp16},
66
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::fp16, matrix_type::fp16,
67
+ matrix_type::fp16, matrix_type::fp16},
68
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
69
+ matrix_type::fp32, matrix_type::fp32},
70
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
71
+ matrix_type::fp16, matrix_type::fp32},
72
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
73
+ matrix_type::fp32, matrix_type::fp16},
74
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
75
+ matrix_type::fp16, matrix_type::fp16},
76
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
77
+ matrix_type::fp32, matrix_type::fp32},
78
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
79
+ matrix_type::fp16, matrix_type::fp32},
80
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
81
+ matrix_type::fp32, matrix_type::fp16},
82
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::fp16, matrix_type::fp16,
83
+ matrix_type::fp16, matrix_type::fp16},
84
+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
85
+ matrix_type::bf16 , matrix_type::bf16 },
86
+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
87
+ matrix_type::fp32, matrix_type::bf16 },
88
+ {8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
89
+ matrix_type::bf16 , matrix_type::fp32},
38
90
{8 , 0 , 0 , 0 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
39
91
matrix_type::fp32, matrix_type::fp32},
40
92
{0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
41
93
matrix_type::fp32, matrix_type::fp32},
94
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
95
+ matrix_type::bf16 , matrix_type::fp32},
96
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
97
+ matrix_type::fp32, matrix_type::bf16 },
98
+ {0 , 0 , 0 , 16 , 16 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
99
+ matrix_type::bf16 , matrix_type::bf16 },
42
100
{0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
43
101
matrix_type::fp32, matrix_type::fp32},
102
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
103
+ matrix_type::bf16 , matrix_type::fp32},
104
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
105
+ matrix_type::fp32, matrix_type::bf16 },
106
+ {0 , 0 , 0 , 1 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
107
+ matrix_type::bf16 , matrix_type::bf16 },
44
108
{0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
45
109
matrix_type::fp32, matrix_type::fp32},
110
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
111
+ matrix_type::bf16 , matrix_type::fp32},
112
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
113
+ matrix_type::fp32, matrix_type::bf16 },
114
+ {0 , 0 , 0 , 32 , 64 , 16 , matrix_type::bf16 , matrix_type::bf16 ,
115
+ matrix_type::bf16 , matrix_type::bf16 },
116
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
117
+ matrix_type::fp32, matrix_type::fp32},
118
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
119
+ matrix_type::bf16 , matrix_type::fp32},
120
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
121
+ matrix_type::fp32, matrix_type::bf16 },
122
+ {0 , 0 , 0 , 1 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
123
+ matrix_type::bf16 , matrix_type::bf16 },
124
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
125
+ matrix_type::fp32, matrix_type::fp32},
126
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
127
+ matrix_type::bf16 , matrix_type::fp32},
128
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
129
+ matrix_type::fp32, matrix_type::bf16 },
130
+ {0 , 0 , 0 , 32 , 64 , 32 , matrix_type::bf16 , matrix_type::bf16 ,
131
+ matrix_type::bf16 , matrix_type::bf16 },
46
132
{8 , 0 , 0 , 0 , 16 , 8 , matrix_type::tf32, matrix_type::tf32,
47
133
matrix_type::fp32, matrix_type::fp32},
48
134
};
0 commit comments