@@ -80,7 +80,7 @@ static void BF16GEMV_N_VSX_1(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, FLOA
80
80
} else if (n ) {
81
81
vy0 [0 ] = vec_loadN_f32 (& v_y [(i * 2 ) + 0 ], n );
82
82
83
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x0 , & va0 [i ], n , zero );
83
+ vy0 [0 ] += vec_loadNHi_mult ( & va0 [i ], v_x0 , n , zero );
84
84
85
85
vec_storeN_f32 (vy0 [0 ], & v_y [(i * 2 ) + 0 ], n );
86
86
}
@@ -131,8 +131,8 @@ static void BF16GEMV_N_VSX_2(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, FLOA
131
131
} else if (n ) {
132
132
vy0 [0 ] = vec_loadN_f32 (& v_y [(i * 2 ) + 0 ], n );
133
133
134
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x0 , & va0 [i ], n , zero );
135
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x1 , & va1 [i ], n , zero );
134
+ vy0 [0 ] += vec_loadNHi_mult ( & va0 [i ], v_x0 , n , zero );
135
+ vy0 [0 ] += vec_loadNHi_mult ( & va1 [i ], v_x1 , n , zero );
136
136
137
137
vec_storeN_f32 (vy0 [0 ], & v_y [(i * 2 ) + 0 ], n );
138
138
}
@@ -193,10 +193,10 @@ static void BF16GEMV_N_VSX_4(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, FLOA
193
193
} else if (n ) {
194
194
vy0 [0 ] = vec_loadN_f32 (& v_y [(i * 2 ) + 0 ], n );
195
195
196
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x0 , & va0 [i ], n , zero );
197
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x1 , & va1 [i ], n , zero );
198
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x2 , & va2 [i ], n , zero );
199
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x3 , & va3 [i ], n , zero );
196
+ vy0 [0 ] += vec_loadNHi_mult ( & va0 [i ], v_x0 , n , zero );
197
+ vy0 [0 ] += vec_loadNHi_mult ( & va1 [i ], v_x1 , n , zero );
198
+ vy0 [0 ] += vec_loadNHi_mult ( & va2 [i ], v_x2 , n , zero );
199
+ vy0 [0 ] += vec_loadNHi_mult ( & va3 [i ], v_x3 , n , zero );
200
200
201
201
vec_storeN_f32 (vy0 [0 ], & v_y [(i * 2 ) + 0 ], n );
202
202
}
@@ -281,14 +281,14 @@ static void BF16GEMV_N_VSX_8(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, BLAS
281
281
} else if (n ) {
282
282
vy0 [0 ] = vec_loadN_f32 (& v_y [(i * 2 ) + 0 ], n );
283
283
284
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x0 , & va0 [i ], n , zero );
285
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x1 , & va1 [i ], n , zero );
286
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x2 , & va2 [i ], n , zero );
287
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x3 , & va3 [i ], n , zero );
288
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x4 , & vb0 [i ], n , zero );
289
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x5 , & vb1 [i ], n , zero );
290
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x6 , & vb2 [i ], n , zero );
291
- vy0 [0 ] += vec_loadNHi_mult2 ( v_x7 , & vb3 [i ], n , zero );
284
+ vy0 [0 ] += vec_loadNHi_mult ( & va0 [i ], v_x0 , n , zero );
285
+ vy0 [0 ] += vec_loadNHi_mult ( & va1 [i ], v_x1 , n , zero );
286
+ vy0 [0 ] += vec_loadNHi_mult ( & va2 [i ], v_x2 , n , zero );
287
+ vy0 [0 ] += vec_loadNHi_mult ( & va3 [i ], v_x3 , n , zero );
288
+ vy0 [0 ] += vec_loadNHi_mult ( & vb0 [i ], v_x4 , n , zero );
289
+ vy0 [0 ] += vec_loadNHi_mult ( & vb1 [i ], v_x5 , n , zero );
290
+ vy0 [0 ] += vec_loadNHi_mult ( & vb2 [i ], v_x6 , n , zero );
291
+ vy0 [0 ] += vec_loadNHi_mult ( & vb3 [i ], v_x7 , n , zero );
292
292
293
293
vec_storeN_f32 (vy0 [0 ], & v_y [(i * 2 ) + 0 ], n );
294
294
}
0 commit comments