Skip to content

Commit 39bf8ec

Browse files
authored
Merge pull request #4340 from yinshiyou/la-dev
Add some refines and optimizations for LoongArch.
2 parents 42b5e08 + 9fe07d8 commit 39bf8ec

File tree

10 files changed

+786
-26
lines changed

10 files changed

+786
-26
lines changed

benchmark/trsv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ int main(int argc, char *argv[]){
127127
long long muls = n*(n+1)/2.0;
128128
long long adds = (n - 1.0)*n/2.0;
129129

130-
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
130+
fprintf(stderr, "%10d : %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
131131
if(a != NULL){
132132
free(a);
133133
}

c_check

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,7 @@ if [ "$architecture" = "loongarch64" ]; then
199199
tmpd="$(mktemp -d)"
200200
tmplsx="$tmpd/lsx.c"
201201
codelsx='"vadd.b $vr0, $vr0, $vr0"'
202-
lsx_flags='-march=loongarch64 -mlsx'
203-
printf "#include <lsxintrin.h>\n\n" >> "$tmplsx"
202+
lsx_flags='-march=loongarch64'
204203
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx"
205204
args="$lsx_flags -o $tmplsx.o $tmplsx"
206205
{
@@ -211,8 +210,7 @@ if [ "$architecture" = "loongarch64" ]; then
211210

212211
tmplasx="$tmpd/lasx.c"
213212
codelasx='"xvadd.b $xr0, $xr0, $xr0"'
214-
lasx_flags='-march=loongarch64 -mlasx'
215-
printf "#include <lasxintrin.h>\n\n" >> "$tmplasx"
213+
lasx_flags='-march=loongarch64'
216214
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx"
217215
args="$lasx_flags -o $tmplasx.o $tmplasx"
218216
{

c_check.pl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@
241241
} else {
242242
$tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
243243
$codelsx = '"vadd.b $vr0, $vr0, $vr0"';
244-
$lsx_flags = "-march=loongarch64 -mlsx";
245-
print $tmplsx "#include <lsxintrin.h>\n\n";
244+
$lsx_flags = "-march=loongarch64";
246245
print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n";
247246

248247
$args = "$lsx_flags -o $tmplsx.o $tmplsx";
@@ -257,8 +256,7 @@
257256

258257
$tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
259258
$codelasx = '"xvadd.b $xr0, $xr0, $xr0"';
260-
$lasx_flags = "-march=loongarch64 -mlasx";
261-
print $tmplasx "#include <lasxintrin.h>\n\n";
259+
$lasx_flags = "-march=loongarch64";
262260
print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n";
263261

264262
$args = "$lasx_flags -o $tmplasx.o $tmplasx";

common_loongarch64.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,17 @@ static inline int WhereAmI(void){
124124
#define CMPLE fcmp.cle.d
125125
#define CMPLT fcmp.clt.d
126126
#define NEG fneg.d
127+
128+
#define XVFSUB xvfsub.d
129+
#define XVFADD xvfadd.d
130+
#define XVFMADD xvfmadd.d
131+
132+
#define VFSUB vfsub.d
133+
#define VFADD vfadd.d
134+
#define VFMADD vfmadd.d
135+
127136
#else
137+
128138
#define LD fld.s
129139
#define ST fst.s
130140
#define MADD fmadd.s
@@ -142,6 +152,15 @@ static inline int WhereAmI(void){
142152
#define CMPLE fcmp.cle.s
143153
#define CMPLT fcmp.clt.s
144154
#define NEG fneg.s
155+
156+
#define XVFSUB xvfsub.s
157+
#define XVFADD xvfadd.s
158+
#define XVFMADD xvfmadd.s
159+
160+
#define VFSUB vfsub.s
161+
#define VFADD vfadd.s
162+
#define VFMADD vfmadd.s
163+
145164
#endif /* defined(DOUBLE) */
146165

147166
#if defined(__64BIT__) && defined(USE64BITINT)

cpuid_loongarch64.c

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4747
#define CPU_LOONGSON3R5 1
4848
#define CPU_LOONGSON2K1000 2
4949

50-
#define LA_HWCAP_LSX (1<<4)
51-
#define LA_HWCAP_LASX (1<<5)
50+
#define LA_HWCAP_LSX (1U << 4)
51+
#define LA_HWCAP_LASX (1U << 5)
5252

5353
static char *cpuname[] = {
5454
"LOONGSONGENERIC",
@@ -64,11 +64,11 @@ static char *cpuname_lower[] = {
6464

6565
int detect(void) {
6666
#ifdef __linux
67-
int flag = (int)getauxval(AT_HWCAP);
67+
int hwcap = (int)getauxval(AT_HWCAP);
6868

69-
if (flag & LA_HWCAP_LASX)
69+
if (hwcap & LA_HWCAP_LASX)
7070
return CPU_LOONGSON3R5;
71-
else if (flag & LA_HWCAP_LSX)
71+
else if (hwcap & LA_HWCAP_LSX)
7272
return CPU_LOONGSON2K1000;
7373
else
7474
return CPU_GENERIC;
@@ -94,7 +94,9 @@ void get_subdirname(void) {
9494
}
9595

9696
void get_cpuconfig(void) {
97+
uint32_t hwcaps = 0;
9798
int d = detect();
99+
98100
switch (d) {
99101
case CPU_LOONGSON3R5:
100102
printf("#define LOONGSON3R5\n");
@@ -129,6 +131,10 @@ void get_cpuconfig(void) {
129131
printf("#define L2_ASSOCIATIVE 16\n");
130132
break;
131133
}
134+
135+
hwcaps = (uint32_t)getauxval( AT_HWCAP );
136+
if (hwcaps & LA_HWCAP_LSX) printf("#define HAVE_LSX\n");
137+
if (hwcaps & LA_HWCAP_LASX) printf("#define HAVE_LASX\n");
132138
}
133139

134140
void get_libname(void){

driver/others/dynamic_loongarch64.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
2525
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626
*******************************************************************************/
2727

28+
#include <sys/auxv.h>
2829
#include "common.h"
2930

3031
extern gotoblas_t gotoblas_LOONGSON3R5;
@@ -74,21 +75,15 @@ static gotoblas_t *force_coretype(char *coretype) {
7475
return NULL;
7576
}
7677

77-
#define LASX_MASK 1<<7
78-
#define LSX_MASK 1<<6
79-
#define LOONGARCH_CFG2 0x02
78+
#define LA_HWCAP_LSX (1U << 4)
79+
#define LA_HWCAP_LASX (1U << 5)
8080

8181
static gotoblas_t *get_coretype(void) {
82-
int ret = 0;
83-
__asm__ volatile (
84-
"cpucfg %0, %1 \n\t"
85-
: "+&r"(ret)
86-
: "r"(LOONGARCH_CFG2)
87-
);
88-
89-
if (ret & LASX_MASK)
82+
int hwcap = (int)getauxval(AT_HWCAP);
83+
84+
if (hwcap & LA_HWCAP_LASX)
9085
return &gotoblas_LOONGSON3R5;
91-
else if (ret & LSX_MASK)
86+
else if (hwcap & LA_HWCAP_LSX)
9287
return &gotoblas_LOONGSON2K1000;
9388
else
9489
return &gotoblas_LOONGSONGENERIC;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
ifndef NO_LSX
2+
3+
SDOTKERNEL = dot_lsx.S
4+
DSDOTKERNEL = dot_lsx.S
5+
DDOTKERNEL = dot_lsx.S
6+
7+
endif

kernel/loongarch64/KERNEL.LOONGSON3R5

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
ifndef NO_LASX
2+
3+
SDOTKERNEL = dot_lasx.S
4+
DSDOTKERNEL = dot_lasx.S
5+
DDOTKERNEL = dot_lasx.S
6+
27
DGEMMKERNEL = dgemm_kernel_16x4.S
38
DGEMMINCOPY = dgemm_ncopy_16.S
49
DGEMMITCOPY = dgemm_tcopy_16.S

0 commit comments

Comments
 (0)