Skip to content

Commit f5d0431

Browse files
authored
Merge branch 'OpenMathLib:develop' into scalfixes
2 parents 73f8866 + a815594 commit f5d0431

File tree

12 files changed

+1093
-840
lines changed

12 files changed

+1093
-840
lines changed

.github/workflows/riscv64_vector.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ jobs:
2828
- target: RISCV64_ZVL256B
2929
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
3030
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
31+
- target: DYNAMIC_ARCH=1
32+
opts: TARGET=RISCV64_GENERIC BINARY=64 ARCH=riscv64 DYNAMIC_ARCH=1
33+
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
3134

3235
steps:
3336
- name: Checkout repository

Makefile.system

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,17 @@ ifeq ($(ARCH), loongarch64)
715715
DYNAMIC_CORE = LOONGSON3R5 LOONGSON2K1000 LOONGSONGENERIC
716716
endif
717717

718+
ifeq ($(ARCH), riscv64)
719+
DYNAMIC_CORE = RISCV64_GENERIC
720+
DYNAMIC_CORE += RISCV64_ZVL128B
721+
DYNAMIC_CORE += RISCV64_ZVL256B
722+
ifdef DYNAMIC_LIST
723+
override DYNAMIC_CORE = RISCV64_GENERIC $(DYNAMIC_LIST)
724+
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_RISCV64_GENERIC
725+
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
726+
endif
727+
endif
728+
718729
ifeq ($(ARCH), zarch)
719730
DYNAMIC_CORE = ZARCH_GENERIC
720731

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additi
234234

235235
on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support.
236236

237+
On **riscv64**, DYNAMIC_ARCH enables support for riscv64_zvl128b and riscv64_zvl256b in addition to generic riscv64 support. A compiler that supports RVV 1.0 is required to build OpenBLAS for riscv64 when DYNAMIC_ARCH is enabled.
238+
237239
The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the
238240
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
239241
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.

driver/others/Makefile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,16 @@ else
3030
ifeq ($(ARCH),loongarch64)
3131
COMMONOBJS += dynamic_loongarch64.$(SUFFIX)
3232
else
33+
ifeq ($(ARCH),riscv64)
34+
COMMONOBJS += dynamic_riscv64.$(SUFFIX) detect_riscv64.$(SUFFIX)
35+
else
3336
COMMONOBJS += dynamic.$(SUFFIX)
3437
endif
3538
endif
3639
endif
3740
endif
3841
endif
42+
endif
3943
else
4044
COMMONOBJS += parameter.$(SUFFIX)
4145
endif
@@ -106,12 +110,16 @@ else
106110
ifeq ($(ARCH),loongarch64)
107111
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_loongarch64.$(SUFFIX)
108112
else
113+
ifeq ($(ARCH),riscv64)
114+
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_riscv64.$(SUFFIX) detect_riscv64.$(SUFFIX)
115+
else
109116
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
110117
endif
111118
endif
112119
endif
113120
endif
114121
endif
122+
endif
115123
else
116124
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
117125
endif
@@ -209,6 +217,9 @@ addx.$(SUFFIX) : $(ARCH)/addx.c
209217
mulx.$(SUFFIX) : $(ARCH)/mulx.c
210218
$(CC) $(CFLAGS) -c -DXDOUBLE -UCOMPLEX $< -o $(@F)
211219

220+
detect_riscv64.$(SUFFIX): detect_riscv64.c
221+
$(CC) $(CFLAGS) -c -march=rv64imafdcv $< -o $(@F)
222+
212223
xerbla.$(PSUFFIX) : xerbla.c
213224
$(CC) $(PFLAGS) -c $< -o $(@F)
214225

driver/others/detect_riscv64.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*****************************************************************************
2+
Copyright (c) 2024, The OpenBLAS Project
3+
All rights reserved.
4+
5+
Redistribution and use in source and binary forms, with or without
6+
modification, are permitted provided that the following conditions are
7+
met:
8+
9+
1. Redistributions of source code must retain the above copyright
10+
notice, this list of conditions and the following disclaimer.
11+
12+
2. Redistributions in binary form must reproduce the above copyright
13+
notice, this list of conditions and the following disclaimer in
14+
the documentation and/or other materials provided with the
15+
distribution.
16+
3. Neither the name of the OpenBLAS project nor the names of
17+
its contributors may be used to endorse or promote products
18+
derived from this software without specific prior written
19+
permission.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
30+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
**********************************************************************************/
32+
33+
#include <stdint.h>
34+
35+
#ifdef __riscv_v_intrinsic
36+
#include <riscv_vector.h>
37+
#endif
38+
39+
unsigned detect_riscv64_get_vlenb(void) {
40+
#ifdef __riscv_v_intrinsic
41+
return __riscv_vlenb();
42+
#else
43+
return 0;
44+
#endif
45+
}
46+
47+
/*
48+
* Based on the approach taken here:
49+
* https://code.videolan.org/videolan/dav1d/-/merge_requests/1629
50+
*
51+
* Only to be called after we've determined we have some sort of
52+
* RVV support.
53+
*/
54+
55+
uint64_t detect_riscv64_rvv100(void)
56+
{
57+
uint64_t rvv10_supported;
58+
59+
/*
60+
* After the vsetvli statement vtype will either be a value > 0 if the
61+
* vsetvli succeeded or less than 0 if it failed. If 0 < vtype
62+
* we're good and the function will return 1, otherwise there's no
63+
* RVV 1.0 and we return 0.
64+
*/
65+
66+
asm volatile("vsetvli x0, x0, e8, m1, ta, ma\n\t"
67+
"csrr %0, vtype\n\t"
68+
"slt %0, x0, %0\n"
69+
: "=r" (rvv10_supported)
70+
:
71+
:);
72+
73+
return rvv10_supported;
74+
}
75+

0 commit comments

Comments
 (0)