Skip to content

Commit 874df65

Browse files
authored
Fix incorrect sgemv results for IBM z14
part of PR #1993 that was inadvertently misplaced into the toplevel directory
1 parent 1f4b61f commit 874df65

File tree

1 file changed

+22
-38
lines changed

1 file changed

+22
-38
lines changed

kernel/zarch/sgemv_t_4.c

Lines changed: 22 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -158,32 +158,24 @@ static void sgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
158158
"brctg %%r0,2b \n\t"
159159

160160
"3: \n\t"
161-
"vrepf %%v4,%%v0,1 \n\t"
162-
"aebr %%f0,%%f4 \n\t"
163-
"vrepf %%v4,%%v0,2 \n\t"
164-
"aebr %%f0,%%f4 \n\t"
165-
"vrepf %%v4,%%v0,3 \n\t"
161+
"veslg %%v4,%%v0,32 \n\t"
162+
"vfasb %%v0,%%v0,%%v4 \n\t"
163+
"vrepg %%v4,%%v0,1 \n\t"
166164
"aebr %%f0,%%f4 \n\t"
167165
"ste %%f0,0(%6) \n\t"
168-
"vrepf %%v4,%%v1,1 \n\t"
169-
"aebr %%f1,%%f4 \n\t"
170-
"vrepf %%v4,%%v1,2 \n\t"
171-
"aebr %%f1,%%f4 \n\t"
172-
"vrepf %%v4,%%v1,3 \n\t"
166+
"veslg %%v4,%%v1,32 \n\t"
167+
"vfasb %%v1,%%v1,%%v4 \n\t"
168+
"vrepg %%v4,%%v1,1 \n\t"
173169
"aebr %%f1,%%f4 \n\t"
174170
"ste %%f1,4(%6) \n\t"
175-
"vrepf %%v4,%%v2,1 \n\t"
176-
"aebr %%f2,%%f4 \n\t"
177-
"vrepf %%v4,%%v2,2 \n\t"
178-
"aebr %%f2,%%f4 \n\t"
179-
"vrepf %%v4,%%v2,3 \n\t"
171+
"veslg %%v4,%%v2,32 \n\t"
172+
"vfasb %%v2,%%v2,%%v4 \n\t"
173+
"vrepg %%v4,%%v2,1 \n\t"
180174
"aebr %%f2,%%f4 \n\t"
181175
"ste %%f2,8(%6) \n\t"
182-
"vrepf %%v4,%%v3,1 \n\t"
183-
"aebr %%f3,%%f4 \n\t"
184-
"vrepf %%v4,%%v3,2 \n\t"
185-
"aebr %%f3,%%f4 \n\t"
186-
"vrepf %%v4,%%v3,3 \n\t"
176+
"veslg %%v4,%%v3,32 \n\t"
177+
"vfasb %%v3,%%v3,%%v4 \n\t"
178+
"vrepg %%v4,%%v3,1 \n\t"
187179
"aebr %%f3,%%f4 \n\t"
188180
"ste %%f3,12(%6) "
189181
:
@@ -281,18 +273,14 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
281273
"brctg %%r0,2b \n\t"
282274

283275
"3: \n\t"
284-
"vrepf %%v2,%%v0,1 \n\t"
285-
"aebr %%f0,%%f2 \n\t"
286-
"vrepf %%v2,%%v0,2 \n\t"
287-
"aebr %%f0,%%f2 \n\t"
288-
"vrepf %%v2,%%v0,3 \n\t"
276+
"veslg %%v2,%%v0,32 \n\t"
277+
"vfasb %%v0,%%v0,%%v2 \n\t"
278+
"vrepg %%v2,%%v0,1 \n\t"
289279
"aebr %%f0,%%f2 \n\t"
290280
"ste %%f0,0(%4) \n\t"
291-
"vrepf %%v2,%%v1,1 \n\t"
292-
"aebr %%f1,%%f2 \n\t"
293-
"vrepf %%v2,%%v1,2 \n\t"
294-
"aebr %%f1,%%f2 \n\t"
295-
"vrepf %%v2,%%v1,3 \n\t"
281+
"veslg %%v2,%%v1,32 \n\t"
282+
"vfasb %%v1,%%v1,%%v2 \n\t"
283+
"vrepg %%v2,%%v1,1 \n\t"
296284
"aebr %%f1,%%f2 \n\t"
297285
"ste %%f1,4(%4) "
298286
:
@@ -349,7 +337,7 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y)
349337

350338
"vl %%v31,112(%%r1,%1) \n\t"
351339
"vfmasb %%v0,%%v23,%%v31,%%v0 \n\t"
352-
340+
353341
"agfi %%r1,128 \n\t"
354342
"brctg %%r0,0b \n\t"
355343

@@ -370,11 +358,9 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y)
370358
"brctg %%r0,2b \n\t"
371359

372360
"3: \n\t"
373-
"vrepf %%v1,%%v0,1 \n\t"
374-
"aebr %%f0,%%f1 \n\t"
375-
"vrepf %%v1,%%v0,2 \n\t"
376-
"aebr %%f0,%%f1 \n\t"
377-
"vrepf %%v1,%%v0,3 \n\t"
361+
"veslg %%v1,%%v0,32 \n\t"
362+
"vfasb %%v0,%%v0,%%v1 \n\t"
363+
"vrepg %%v1,%%v0,1 \n\t"
378364
"aebr %%f0,%%f1 \n\t"
379365
"ste %%f0,0(%3) "
380366
:
@@ -823,5 +809,3 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
823809

824810
return(0);
825811
}
826-
827-

0 commit comments

Comments
 (0)