Skip to content

Commit ab7f466

Browse files
authored
Merge pull request #106 from xianyi/develop
rebase
2 parents 336e354 + 909068f commit ab7f466

File tree

9 files changed

+291
-150
lines changed

9 files changed

+291
-150
lines changed

Makefile.x86_64

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,10 @@ ifndef NO_AVX2
7474
ifeq ($(C_COMPILER), GCC)
7575
# AVX2 support was added in 4.7.0
7676
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
77+
GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5)
7778
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
78-
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
79+
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
80+
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
7981
CCOMMON_OPT += -mavx2
8082
endif
8183
else
@@ -86,8 +88,14 @@ endif
8688
ifeq ($(F_COMPILER), GFORTRAN)
8789
# AVX2 support was added in 4.7.0
8890
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
91+
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
8992
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
90-
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
93+
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCVERSIONMINORGTEQ7)
94+
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
95+
FCOMMON_OPT += -mavx2
96+
endif
97+
else
98+
ifeq ($(F_COMPILER), FLANG)
9199
FCOMMON_OPT += -mavx2
92100
endif
93101
endif

kernel/power/KERNEL.POWER10

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,9 @@ CAXPYKERNEL = caxpy.c
150150
endif
151151
ZAXPYKERNEL = zaxpy_power10.c
152152
#
153-
SCOPYKERNEL = scopy.c
153+
SCOPYKERNEL = scopy_power10.c
154154
DCOPYKERNEL = dcopy_power10.c
155-
CCOPYKERNEL = ccopy.c
155+
CCOPYKERNEL = ccopy_power10.c
156156
ZCOPYKERNEL = zcopy_power10.c
157157
#
158158
SDOTKERNEL = sdot.c

kernel/power/ccopy_power10.c

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
/***************************************************************************
2+
Copyright (c) 2013-2016, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#if defined(__VEC__) || defined(__ALTIVEC__)
31+
#include "copy_microk_power10.c"
32+
#endif
33+
34+
#ifndef HAVE_KERNEL
35+
36+
static void copy_kernel(BLASLONG n, FLOAT *x, FLOAT *y)
37+
{
38+
39+
BLASLONG i=0;
40+
FLOAT f0, f1, f2, f3, f4, f5, f6, f7;
41+
FLOAT *x1=x;
42+
FLOAT *y1=y;
43+
44+
while ( i<n )
45+
{
46+
47+
f0 = x1[0];
48+
f1 = x1[1];
49+
f2 = x1[2];
50+
f3 = x1[3];
51+
f4 = x1[4];
52+
f5 = x1[5];
53+
f6 = x1[6];
54+
f7 = x1[7];
55+
56+
y1[0] = f0;
57+
y1[1] = f1;
58+
y1[2] = f2;
59+
y1[3] = f3;
60+
y1[4] = f4;
61+
y1[5] = f5;
62+
y1[6] = f6;
63+
y1[7] = f7;
64+
65+
x1 += 8;
66+
y1 += 8;
67+
68+
i+=4;
69+
}
70+
return;
71+
72+
}
73+
74+
75+
#endif
76+
77+
78+
79+
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
80+
{
81+
BLASLONG i=0;
82+
BLASLONG ix=0,iy=0;
83+
84+
if ( n <= 0 ) return(0);
85+
86+
if ( (inc_x == 1) && (inc_y == 1 ))
87+
{
88+
89+
BLASLONG n1 = n & -64;
90+
if ( n1 > 0 )
91+
{
92+
copy_kernel(n1, x, y);
93+
i=n1;
94+
ix=n1*2;
95+
iy=n1*2;
96+
}
97+
98+
while(i < n)
99+
{
100+
y[iy] = x[iy] ;
101+
y[iy+1] = x[ix+1] ;
102+
ix+=2;
103+
iy+=2;
104+
i++ ;
105+
106+
}
107+
108+
109+
}
110+
else
111+
{
112+
113+
BLASLONG inc_x2 = 2 * inc_x;
114+
BLASLONG inc_y2 = 2 * inc_y;
115+
116+
while(i < n)
117+
{
118+
y[iy] = x[ix] ;
119+
y[iy+1] = x[ix+1] ;
120+
ix += inc_x2 ;
121+
iy += inc_y2 ;
122+
i++ ;
123+
124+
}
125+
126+
}
127+
return(0);
128+
129+
130+
}
131+
132+

kernel/power/dcopy_microk_power10.c renamed to kernel/power/copy_microk_power10.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
2525
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
*****************************************************************************/
2727

28-
#define HAVE_KERNEL_64 1
28+
#define HAVE_KERNEL 1
2929

30-
static void dcopy_kernel_64 (long n, double *x, double *y)
30+
static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y)
3131
{
3232
__asm__
3333
(
@@ -49,8 +49,13 @@ static void dcopy_kernel_64 (long n, double *x, double *y)
4949
"lxvp 60, 448(%2) \n\t"
5050
"lxvp 62, 480(%2) \n\t"
5151
"addi %2, %2, 512 \n\t"
52-
52+
#if !defined(COMPLEX) && !defined(DOUBLE)
53+
"addic. %1, %1, -128 \n\t"
54+
#elif defined(COMPLEX) && defined(DOUBLE)
55+
"addic. %1, %1, -32 \n\t"
56+
#else
5357
"addic. %1, %1, -64 \n\t"
58+
#endif
5459
"ble two%= \n\t"
5560

5661
".align 5 \n"
@@ -94,7 +99,13 @@ static void dcopy_kernel_64 (long n, double *x, double *y)
9499
"addi %3, %3, 512 \n\t"
95100
"addi %2, %2, 512 \n\t"
96101

102+
#if !defined(COMPLEX) && !defined(DOUBLE)
103+
"addic. %1, %1, -128 \n\t"
104+
#elif defined(COMPLEX) && defined(DOUBLE)
105+
"addic. %1, %1, -32 \n\t"
106+
#else
97107
"addic. %1, %1, -64 \n\t"
108+
#endif
98109
"bgt one%= \n"
99110

100111
"two%=: \n\t"
@@ -121,7 +132,7 @@ static void dcopy_kernel_64 (long n, double *x, double *y)
121132
"=m" (*y),
122133
"+r" (n), // 1
123134
"+b" (x), // 2
124-
"+b" (y) // 3
135+
"+b" (y) // 3
125136
:
126137
"m" (*x)
127138
:

kernel/power/dcopy_power10.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2828
#include "common.h"
2929

3030
#if defined(__VEC__) || defined(__ALTIVEC__)
31-
#include "dcopy_microk_power10.c"
31+
#include "copy_microk_power10.c"
3232
#endif
3333

34-
#ifndef HAVE_KERNEL_64
34+
#ifndef HAVE_KERNEL
3535

36-
static void dcopy_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y)
36+
static void copy_kernel(BLASLONG n, FLOAT *x, FLOAT *y)
3737
{
3838

3939
BLASLONG i=0;
@@ -89,7 +89,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
8989
BLASLONG n1 = n & -64;
9090
if ( n1 > 0 )
9191
{
92-
dcopy_kernel_64(n1, x, y);
92+
copy_kernel(n1, x, y);
9393
i=n1;
9494
}
9595

kernel/power/scopy_power10.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/***************************************************************************
2+
Copyright (c) 2013-2016, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#if defined(__VEC__) || defined(__ALTIVEC__)
31+
#include "copy_microk_power10.c"
32+
#endif
33+
34+
#ifndef HAVE_KERNEL
35+
36+
static void copy_kernel (BLASLONG n, FLOAT *x, FLOAT *y)
37+
{
38+
39+
BLASLONG i=0;
40+
FLOAT f0, f1, f2, f3, f4, f5, f6, f7;
41+
FLOAT *x1=x;
42+
FLOAT *y1=y;
43+
44+
while ( i<n )
45+
{
46+
47+
f0 = x1[0];
48+
f1 = x1[1];
49+
f2 = x1[2];
50+
f3 = x1[3];
51+
f4 = x1[4];
52+
f5 = x1[5];
53+
f6 = x1[6];
54+
f7 = x1[7];
55+
56+
y1[0] = f0;
57+
y1[1] = f1;
58+
y1[2] = f2;
59+
y1[3] = f3;
60+
y1[4] = f4;
61+
y1[5] = f5;
62+
y1[6] = f6;
63+
y1[7] = f7;
64+
65+
x1 += 8;
66+
y1 += 8;
67+
68+
i+=8;
69+
}
70+
return;
71+
72+
}
73+
74+
75+
#endif
76+
77+
78+
79+
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
80+
{
81+
BLASLONG i=0;
82+
BLASLONG ix=0,iy=0;
83+
84+
if ( n <= 0 ) return(0);
85+
86+
if ( (inc_x == 1) && (inc_y == 1 ))
87+
{
88+
89+
BLASLONG n1 = n & -128;
90+
if ( n1 > 0 )
91+
{
92+
copy_kernel (n1, x, y);
93+
i=n1;
94+
}
95+
96+
while(i < n)
97+
{
98+
y[i] = x[i] ;
99+
i++ ;
100+
101+
}
102+
103+
104+
}
105+
else
106+
{
107+
108+
while(i < n)
109+
{
110+
y[iy] = x[ix] ;
111+
ix += inc_x ;
112+
iy += inc_y ;
113+
i++ ;
114+
115+
}
116+
117+
}
118+
return(0);
119+
120+
121+
}
122+
123+

0 commit comments

Comments
 (0)