Skip to content

Commit 6d8a273

Browse files
authored
Handle zero increment(s) in C910V ?AXPBY (#4483)
* Handle zero increment(s)
1 parent dbcf4f8 commit 6d8a273

File tree

2 files changed

+135
-0
lines changed

2 files changed

+135
-0
lines changed

kernel/riscv64/axpby_vector.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,63 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *
6969

7070
BLASLONG stride_x, stride_y, ix = 0, iy = 0;
7171

72+
if (inc_x == 0 || inc_y == 0) { /* use trivial non-vectorized loop if either increment is zero */
73+
74+
if ( beta == 0.0 )
75+
{
76+
77+
if ( alpha == 0.0 )
78+
{
79+
while(i < n)
80+
{
81+
y[iy] = 0.0 ;
82+
iy += inc_y ;
83+
i++ ;
84+
}
85+
}
86+
else
87+
{
88+
while(i < n)
89+
{
90+
y[iy] = alpha * x[ix] ;
91+
ix += inc_x ;
92+
iy += inc_y ;
93+
i++ ;
94+
}
95+
96+
97+
}
98+
99+
}
100+
else
101+
{
102+
103+
if ( alpha == 0.0 )
104+
{
105+
while(i < n)
106+
{
107+
y[iy] = beta * y[iy] ;
108+
iy += inc_y ;
109+
i++ ;
110+
}
111+
}
112+
else
113+
{
114+
while(i < n)
115+
{
116+
y[iy] = alpha * x[ix] + beta * y[iy] ;
117+
ix += inc_x ;
118+
iy += inc_y ;
119+
i++ ;
120+
}
121+
}
122+
123+
}
124+
125+
return(0);
126+
127+
} else { /* vectorized approach for non-zero increments */
128+
72129
if(beta == 0.0){
73130
if(alpha == 0.0){//alpha == 0 && beta == 0
74131
if(inc_y == 1){
@@ -381,5 +438,6 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *
381438
}
382439
}
383440
return(0);
441+
}
384442
}
385443

kernel/riscv64/zaxpby_vector.c

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,82 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
6262
stride_x = inc_x * 2 * sizeof(FLOAT);
6363
stride_y = inc_y * 2 * sizeof(FLOAT);
6464

65+
if (inc_x == 0 || inc_y == 0) {
66+
67+
FLOAT temp;
68+
BLASLONG inc_x2, inc_y2;
69+
70+
inc_x2 = 2 * inc_x;
71+
inc_y2 = 2 * inc_y;
72+
73+
if ( beta_r == 0.0 && beta_i == 0.0)
74+
{
75+
if ( alpha_r == 0.0 && alpha_i == 0.0 )
76+
{
77+
78+
while(i < n)
79+
{
80+
y[iy] = 0.0 ;
81+
y[iy+1] = 0.0 ;
82+
iy += inc_y2 ;
83+
i++ ;
84+
}
85+
86+
}
87+
else
88+
{
89+
90+
while(i < n)
91+
{
92+
y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ;
93+
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ;
94+
ix += inc_x2 ;
95+
iy += inc_y2 ;
96+
i++ ;
97+
}
98+
99+
100+
}
101+
102+
}
103+
else
104+
{
105+
if ( alpha_r == 0.0 && alpha_i == 0.0 )
106+
{
107+
108+
while(i < n)
109+
{
110+
temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
111+
y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
112+
y[iy] = temp;
113+
iy += inc_y2 ;
114+
i++ ;
115+
}
116+
117+
}
118+
else
119+
{
120+
121+
while(i < n)
122+
{
123+
temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
124+
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
125+
y[iy] = temp;
126+
ix += inc_x2 ;
127+
iy += inc_y2 ;
128+
i++ ;
129+
}
130+
131+
132+
}
133+
134+
135+
136+
}
137+
return(0);
138+
139+
} else {
140+
65141
if(beta_r == 0.0 && beta_i == 0.0){
66142
if(alpha_r == 0.0 && alpha_i == 0.0){
67143
if(inc_y == 1){
@@ -191,5 +267,6 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL
191267
}
192268
}
193269
return(0);
270+
}
194271
}
195272

0 commit comments

Comments
 (0)