@@ -96,7 +96,7 @@ static void zscal_kernel_8(BLASLONG n, FLOAT da_r,FLOAT da_i, FLOAT *x) {
96
96
"clgrjl %[x_ptr],%%r0,1b \n\t"
97
97
: [mem ] "+m" (* (double (* )[2 * n ])x ) ,[x_ptr ] "+&a" (x )
98
98
: [n ] "r" (n ), [alpha_r ] "f" (da_r ),[alpha_i ] "f" (da_i )
99
- : "cc" , "memory" , " r0" ,"v16" ,"v17" ,"v18" ,"v19" ,"v20" ,"v21" ,"v22" ,"v23" ,"v24" ,"v25"
99
+ : "cc" , "r0" ,"v16" ,"v17" ,"v18" ,"v19" ,"v20" ,"v21" ,"v22" ,"v23" ,"v24" ,"v25"
100
100
);
101
101
102
102
@@ -106,10 +106,9 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT da_i, FLOAT *x) {
106
106
107
107
__asm__ ( "pfd 2, 0(%1) \n\t"
108
108
"lgdr %%r0,%[alpha] \n\t"
109
- "vlvgg %%v16,%%r0,0 \n\t"
110
- "lcdbr %[alpha],%[alpha] \n\t"
111
- "lgdr %%r0,%[alpha] \n\t"
112
- "vlvgg %%v16,%%r0,1 \n\t"
109
+ "vlvgp %%v16,%%r0,%%r0 \n\t" //load both from disjoint
110
+ "vflcdb %%v16,%%v16 \n\t" //complement both
111
+ "vlvgg %%v16,%%r0,0 \n\t" //restore 1st
113
112
"vlr %%v17 ,%%v16 \n\t"
114
113
"sllg %%r0,%[n],4 \n\t"
115
114
"agr %%r0,%[x_ptr] \n\t"
@@ -129,8 +128,8 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT da_i, FLOAT *x) {
129
128
"vsteg %%v26, 40(%[x_ptr]),0 \n\t"
130
129
"vl %%v27, 48(%[x_ptr]) \n\t"
131
130
"vfmdb %%v27,%%v27,%%v17 \n\t"
132
- "vsteg %%v27, 40 (%[x_ptr]),1 \n\t"
133
- "vsteg %%v27, 48 (%[x_ptr]),0 \n\t"
131
+ "vsteg %%v27, 48 (%[x_ptr]),1 \n\t"
132
+ "vsteg %%v27, 56 (%[x_ptr]),0 \n\t"
134
133
"vl %%v28, 64(%[x_ptr]) \n\t"
135
134
"vfmdb %%v28,%%v28,%%v16 \n\t"
136
135
"vsteg %%v28, 64(%[x_ptr]),1 \n\t"
@@ -141,8 +140,8 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT da_i, FLOAT *x) {
141
140
"vsteg %%v29, 88(%[x_ptr]),0 \n\t"
142
141
"vl %%v30, 96(%[x_ptr]) \n\t"
143
142
"vfmdb %%v30,%%v30,%%v16 \n\t"
144
- "vsteg %%v27 , 96(%[x_ptr]),1 \n\t"
145
- "vsteg %%v27 , 104(%[x_ptr]),0 \n\t"
143
+ "vsteg %%v30 , 96(%[x_ptr]),1 \n\t"
144
+ "vsteg %%v30 , 104(%[x_ptr]),0 \n\t"
146
145
"vl %%v31, 112(%[x_ptr]) \n\t"
147
146
"vfmdb %%v31,%%v31,%%v17 \n\t"
148
147
"vsteg %%v31, 112(%[x_ptr]),1 \n\t"
0 commit comments