@@ -26,7 +26,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
26
*****************************************************************************/
27
27
28
28
29
-
30
29
#define HAVE_KERNEL_4x8 1
31
30
static void sgemv_kernel_4x8 ( BLASLONG n , FLOAT * * ap , FLOAT * x , FLOAT * y , BLASLONG lda4 , FLOAT * alpha ) __attribute__ ((noinline ));
32
31
@@ -49,6 +48,8 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
49
48
50
49
"vbroadcastss (%9), %%ymm6 \n\t" // alpha
51
50
51
+ "movq %8, %%xmm10 \n\t" //save lda
52
+
52
53
"testq $0x04, %1 \n\t"
53
54
"jz 2f \n\t"
54
55
@@ -151,6 +152,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
151
152
152
153
"4: \n\t"
153
154
"vzeroupper \n\t"
155
+ "movq %%xmm10, %8 \n\t" //restore lda
154
156
155
157
:
156
158
"+r" (i ), // 0
@@ -170,14 +172,14 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
170
172
"%xmm4" , "%xmm5" ,
171
173
"%xmm6" , "%xmm7" ,
172
174
"%xmm8" , "%xmm9" ,
175
+ "%xmm10" ,
173
176
"%xmm12" , "%xmm13" , "%xmm14" , "%xmm15" ,
174
177
"memory"
175
178
);
176
179
177
180
}
178
181
179
182
180
-
181
183
#define HAVE_KERNEL_4x4 1
182
184
static void sgemv_kernel_4x4 ( BLASLONG n , FLOAT * * ap , FLOAT * x , FLOAT * y , FLOAT * alpha ) __attribute__ ((noinline ));
183
185
@@ -196,6 +198,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
196
198
197
199
"vbroadcastss (%8), %%ymm6 \n\t" // alpha
198
200
201
+
199
202
"testq $0x04, %1 \n\t"
200
203
"jz 2f \n\t"
201
204
0 commit comments