Skip to content

Commit 3918d85

Browse files
committed
nrm2 simple optimization
1 parent 943372b commit 3918d85

File tree

1 file changed

+3
-17
lines changed

1 file changed

+3
-17
lines changed

kernel/riscv64/nrm2_rvv.c

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3939
#define VFMACCVV_FLOAT vfmacc_vv_f32m8
4040
#define VFMVVF_FLOAT vfmv_v_f_f32m8
4141
#define VFMVVF_FLOAT_M1 vfmv_v_f_f32m1
42-
#define VFREDMAXVS_FLOAT vfredmax_vs_f32m8_f32m1
4342
#define VFMVFS_FLOAT_M1 vfmv_f_s_f32m1_f32
44-
#define VFABSV_FLOAT vfabs_v_f32m8
4543
#define ABS fabsf
4644
#else
4745
#define VSETVL(n) vsetvl_e64m8(n)
@@ -54,9 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5452
#define VFMACCVV_FLOAT vfmacc_vv_f64m8
5553
#define VFMVVF_FLOAT vfmv_v_f_f64m8
5654
#define VFMVVF_FLOAT_M1 vfmv_v_f_f64m1
57-
#define VFREDMAXVS_FLOAT vfredmax_vs_f64m8_f64m1
5855
#define VFMVFS_FLOAT_M1 vfmv_f_s_f64m1_f64
59-
#define VFABSV_FLOAT vfabs_v_f64m8
6056
#define ABS fabs
6157
#endif
6258

@@ -68,12 +64,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
6864
if(n == 1) return (ABS(x[0]));
6965

7066
FLOAT_V_T vr, v0;
71-
FLOAT_V_T_M1 v_max, v_res;
72-
FLOAT scale = 0.0, ssq = 0.0;
67+
FLOAT_V_T_M1 v_res;
68+
FLOAT ssq = 0.0;
7369

7470
size_t vlmax = VSETVL_MAX;
7571
v_res = VFMVVF_FLOAT_M1(0, vlmax);
76-
v_max = VFMVVF_FLOAT_M1(0, vlmax);
7772

7873
vr = VFMVVF_FLOAT(0, vlmax);
7974

@@ -83,9 +78,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
8378
vl = VSETVL(n);
8479

8580
v0 = VLEV_FLOAT(x, vl);
86-
v0 = VFABSV_FLOAT(v0, vl);
87-
88-
v_max = VFREDMAXVS_FLOAT(v_max, v0, v_max, vl);
8981

9082
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
9183
}
@@ -98,20 +90,14 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
9890
vl = VSETVL(n);
9991

10092
v0 = VLSEV_FLOAT(x, stride_x, vl);
101-
v0 = VFABSV_FLOAT(v0, vl);
102-
103-
v_max = VFREDMAXVS_FLOAT(v_max, v0, v_max, vl);
10493

10594
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
10695
}
107-
10896
}
10997

11098
v_res = VFREDSUM_FLOAT(v_res, vr, v_res, vlmax);
11199

112100
ssq = VFMVFS_FLOAT_M1(v_res);
113-
scale = VFMVFS_FLOAT_M1(v_max);
114-
ssq = ssq / (scale*scale);
115101

116-
return(scale * sqrt(ssq));
102+
return sqrt(ssq);
117103
}

0 commit comments

Comments
 (0)