@@ -122,10 +122,7 @@ FORCEINLINE void copy_x(BLASLONG n, IFLOAT *src, IFLOAT *dest, BLASLONG inc_src)
122
122
FORCEINLINE void copy_y_beta (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_src , FLOAT beta )
123
123
{
124
124
if (beta == (FLOAT )0 ) {
125
- for (BLASLONG i = 0 ; i < n ; i ++ ) {
126
- * dest ++ = (FLOAT )0 ;
127
- src += inc_src ;
128
- }
125
+ memset (dest , 0 , n * sizeof (FLOAT ));
129
126
} else if (beta == (FLOAT )1 ) {
130
127
for (BLASLONG i = 0 ; i < n ; i ++ ) {
131
128
* dest ++ = * src ;
@@ -139,13 +136,18 @@ FORCEINLINE void copy_y_beta(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_s
139
136
}
140
137
}
141
138
139
+ FORCEINLINE void move_y (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_dest )
140
+ {
141
+ for (BLASLONG i = 0 ; i < n ; i ++ ) {
142
+ * dest = * src ++ ;
143
+ dest += inc_dest ;
144
+ }
145
+ }
146
+
142
147
FORCEINLINE void copy_y (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_src , FLOAT beta )
143
148
{
144
149
if (beta == (FLOAT )0 ) {
145
- for (BLASLONG i = 0 ; i < n ; i ++ ) {
146
- * dest = * src ++ ;
147
- dest += inc_src ;
148
- }
150
+ move_y (n , src , dest , inc_src );
149
151
} else if (beta == (FLOAT )1 ) {
150
152
for (BLASLONG i = 0 ; i < n ; i ++ ) {
151
153
* dest += * src ++ ;
@@ -159,14 +161,6 @@ FORCEINLINE void copy_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src, F
159
161
}
160
162
}
161
163
162
- FORCEINLINE void move_y (BLASLONG n , FLOAT * src , FLOAT * dest , BLASLONG inc_dest )
163
- {
164
- for (BLASLONG i = 0 ; i < n ; i ++ ) {
165
- * dest = * src ++ ;
166
- dest += inc_dest ;
167
- }
168
- }
169
-
170
164
static void BF16GEMV_N_beta (BLASLONG n , FLOAT * output_vector , FLOAT * input_vector , FLOAT beta )
171
165
{
172
166
if (beta == (FLOAT )0 ) {
0 commit comments