@@ -55,14 +55,14 @@ static void BF16GEMV_T_VSX_1(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
55
55
BLASLONG i = 0 ;
56
56
57
57
for (; i < n8 ; i ++ ) {
58
- vec_load_vec2 (v_x , i , inp , zero );
58
+ vec_load_vec2 (& v_x [ i ] , inp , zero );
59
59
60
60
temp0 += vec_load_mult (& va0 [i ], inp , zero );
61
61
}
62
62
63
63
n &= 7 ;
64
64
if (n > 4 ) {
65
- vec_loadN_vec2 (v_x , i , inp , n , zero );
65
+ vec_loadN_vec2 (& v_x [ i ] , inp , n , zero );
66
66
67
67
temp0 += vec_loadN_mult (& va0 [i ], inp , n , zero );
68
68
} else if (n ) {
@@ -92,15 +92,15 @@ static void BF16GEMV_T_VSX_2(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
92
92
BLASLONG i = 0 ;
93
93
94
94
for (; i < n8 ; i ++ ) {
95
- vec_load_vec2 (v_x , i , inp , zero );
95
+ vec_load_vec2 (& v_x [ i ] , inp , zero );
96
96
97
97
temp0 += vec_load_mult (& va0 [i ], inp , zero );
98
98
temp1 += vec_load_mult (& va1 [i ], inp , zero );
99
99
}
100
100
101
101
n &= 7 ;
102
102
if (n > 4 ) {
103
- vec_loadN_vec2 (v_x , i , inp , n , zero );
103
+ vec_loadN_vec2 (& v_x [ i ] , inp , n , zero );
104
104
105
105
temp0 += vec_loadN_mult (& va0 [i ], inp , n , zero );
106
106
temp1 += vec_loadN_mult (& va1 [i ], inp , n , zero );
@@ -139,7 +139,7 @@ static void BF16GEMV_T_VSX_4(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
139
139
BLASLONG i = 0 ;
140
140
141
141
for (; i < n8 ; i ++ ) {
142
- vec_load_vec2 (v_x , i , inp , zero );
142
+ vec_load_vec2 (& v_x [ i ] , inp , zero );
143
143
144
144
temp0 += vec_load_mult (& va0 [i ], inp , zero );
145
145
temp1 += vec_load_mult (& va1 [i ], inp , zero );
@@ -149,7 +149,7 @@ static void BF16GEMV_T_VSX_4(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
149
149
150
150
n &= 7 ;
151
151
if (n > 4 ) {
152
- vec_loadN_vec2 (v_x , i , inp , n , zero );
152
+ vec_loadN_vec2 (& v_x [ i ] , inp , n , zero );
153
153
154
154
temp0 += vec_loadN_mult (& va0 [i ], inp , n , zero );
155
155
temp1 += vec_loadN_mult (& va1 [i ], inp , n , zero );
@@ -220,7 +220,7 @@ static void BF16GEMV_T_VSX_8(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
220
220
BLASLONG i = 0 ;
221
221
222
222
for (; i < n8 ; i ++ ) {
223
- vec_load_vec2 (v_x , i , inp , zero );
223
+ vec_load_vec2 (& v_x [ i ] , inp , zero );
224
224
225
225
temp0 += vec_load_mult (& va0 [i ], inp , zero );
226
226
temp1 += vec_load_mult (& va1 [i ], inp , zero );
@@ -234,7 +234,7 @@ static void BF16GEMV_T_VSX_8(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
234
234
235
235
n &= 7 ;
236
236
if (n > 4 ) {
237
- vec_loadN_vec2 (v_x , i , inp , n , zero );
237
+ vec_loadN_vec2 (& v_x [ i ] , inp , n , zero );
238
238
239
239
temp0 += vec_loadN_mult (& va0 [i ], inp , n , zero );
240
240
temp1 += vec_loadN_mult (& va1 [i ], inp , n , zero );
@@ -257,7 +257,7 @@ static void BF16GEMV_T_VSX_8(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
257
257
temp7 += vec_loadNHi_mult (& va7 [i ], inp [0 ], n , zero );
258
258
}
259
259
260
- vec_f32 t0 , t1 , t2 , t3 ;
260
+ vec_f32 t0 , t1 , t2 , t3 , t10 , t11 , t12 , t13 ;
261
261
vec_f32 a = { alpha , alpha , alpha , alpha };
262
262
vec_f32 b = { beta , beta , beta , beta };
263
263
vec_f32 * v_y = (vec_f32 * ) y ;
@@ -272,14 +272,14 @@ static void BF16GEMV_T_VSX_8(BLASLONG n, BLASLONG lda, IFLOAT *ap, IFLOAT *x, FL
272
272
temp3 = vec_mergel (t1 , t3 );
273
273
temp0 += temp1 + temp2 + temp3 ;
274
274
275
- t0 = vec_mergeh (temp4 , temp6 );
276
- t1 = vec_mergel (temp4 , temp6 );
277
- t2 = vec_mergeh (temp5 , temp7 );
278
- t3 = vec_mergel (temp5 , temp7 );
279
- temp4 = vec_mergeh (t0 , t2 );
280
- temp5 = vec_mergel (t0 , t2 );
281
- temp6 = vec_mergeh (t1 , t3 );
282
- temp7 = vec_mergel (t1 , t3 );
275
+ t10 = vec_mergeh (temp4 , temp6 );
276
+ t11 = vec_mergel (temp4 , temp6 );
277
+ t12 = vec_mergeh (temp5 , temp7 );
278
+ t13 = vec_mergel (temp5 , temp7 );
279
+ temp4 = vec_mergeh (t10 , t12 );
280
+ temp5 = vec_mergel (t10 , t12 );
281
+ temp6 = vec_mergeh (t11 , t13 );
282
+ temp7 = vec_mergel (t11 , t13 );
283
283
temp4 += temp5 + temp6 + temp7 ;
284
284
285
285
vec_load_pair (inp , v_y );
0 commit comments