Skip to content

Commit 92275a7

Browse files
authored
Merge pull request #3642 from nursik/develop
Add ARM64 support for Windows
2 parents 914c4d0 + 1dfc4e6 commit 92275a7

File tree

3 files changed

+199
-2
lines changed

3 files changed

+199
-2
lines changed

cmake/system_check.cmake

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ endif()
3131

3232
# Pretty thorough determination of arch. Add more if needed
3333
if(CMAKE_CL_64 OR MINGW64)
34-
set(X86_64 1)
34+
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
35+
set(ARM64 1)
36+
else()
37+
set(X86_64 1)
38+
endif()
3539
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
3640
set(X86 1)
3741
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*")

common_arm64.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3333
#ifndef COMMON_ARM64
3434
#define COMMON_ARM64
3535

36+
#ifdef C_MSVC
37+
#include <intrin.h>
38+
#define MB __dmb(_ARM64_BARRIER_ISH)
39+
#define WMB __dmb(_ARM64_BARRIER_ISHST)
40+
#define RMB __dmb(_ARM64_BARRIER_ISHLD)
41+
#else
3642
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
3743
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
3844
#define RMB __asm__ __volatile__ ("dmb ishld" : : : "memory")
45+
#endif
3946

4047
#define INLINE inline
4148

@@ -53,6 +60,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
5360
BLASULONG ret;
5461

5562
do {
63+
#ifndef C_MSVC
5664
__asm__ __volatile__(
5765
"mov x4, #1 \n\t"
5866
"sevl \n\t"
@@ -70,7 +78,10 @@ static void __inline blas_lock(volatile BLASULONG *address){
7078

7179

7280
);
73-
81+
#else
82+
while (*address) {YIELDING;}
83+
ret=InterlockedExchange64((volatile LONG64 *)(address), 1);
84+
#endif
7485

7586
} while (ret);
7687

@@ -80,13 +91,22 @@ static void __inline blas_lock(volatile BLASULONG *address){
8091

8192
#if !defined(OS_DARWIN) && !defined (OS_ANDROID)
8293
static __inline BLASULONG rpcc(void){
94+
#ifdef C_MSVC
95+
const int64_t pmccntr_el0 = (((3 & 1) << 14) | // op0
96+
((3 & 7) << 11) | // op1
97+
((9 & 15) << 7) | // crn
98+
((13 & 15) << 3) | // crm
99+
((0 & 7) << 0)); // op2
100+
return _ReadStatusReg(pmccntr_el0);
101+
#else
83102
BLASULONG ret = 0;
84103
blasint shift;
85104

86105
__asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret));
87106
__asm__ __volatile__ ("mrs %0,cntfrq_el0; clz %w0, %w0":"=&r"(shift));
88107

89108
return ret << shift;
109+
#endif
90110
}
91111

92112
#define RPCC_DEFINED

kernel/arm64/KERNEL.generic

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
ifndef DSDOTKERNEL
2+
DSDOTKERNEL = ../generic/dot.c
3+
endif
4+
SGEMM_BETA = ../generic/gemm_beta.c
5+
DGEMM_BETA = ../generic/gemm_beta.c
6+
CGEMM_BETA = ../generic/zgemm_beta.c
7+
ZGEMM_BETA = ../generic/zgemm_beta.c
8+
9+
STRMMKERNEL = ../generic/trmmkernel_2x2.c
10+
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
11+
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
12+
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
13+
14+
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
15+
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
16+
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
17+
SGEMMONCOPYOBJ = sgemm_oncopy.o
18+
SGEMMOTCOPYOBJ = sgemm_otcopy.o
19+
20+
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
21+
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
22+
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
23+
DGEMMONCOPYOBJ = dgemm_oncopy.o
24+
DGEMMOTCOPYOBJ = dgemm_otcopy.o
25+
26+
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
27+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
28+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
29+
CGEMMONCOPYOBJ = cgemm_oncopy.o
30+
CGEMMOTCOPYOBJ = cgemm_otcopy.o
31+
32+
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
33+
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
34+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
35+
ZGEMMONCOPYOBJ = zgemm_oncopy.o
36+
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
37+
38+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
39+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
40+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
41+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
42+
43+
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
44+
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
45+
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
46+
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
47+
48+
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
49+
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
50+
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
51+
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
52+
53+
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
54+
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
55+
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
56+
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
57+
58+
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
59+
CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
60+
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S
61+
62+
#Pure C for other kernels
63+
SAMAXKERNEL = ../arm/amax.c
64+
DAMAXKERNEL = ../arm/amax.c
65+
CAMAXKERNEL = ../arm/zamax.c
66+
ZAMAXKERNEL = ../arm/zamax.c
67+
68+
SAMINKERNEL = ../arm/amin.c
69+
DAMINKERNEL = ../arm/amin.c
70+
CAMINKERNEL = ../arm/zamin.c
71+
ZAMINKERNEL = ../arm/zamin.c
72+
73+
SMAXKERNEL = ../arm/max.c
74+
DMAXKERNEL = ../arm/max.c
75+
76+
SMINKERNEL = ../arm/min.c
77+
DMINKERNEL = ../arm/min.c
78+
79+
ISAMAXKERNEL = ../arm/iamax.c
80+
IDAMAXKERNEL = ../arm/iamax.c
81+
ICAMAXKERNEL = ../arm/izamax.c
82+
IZAMAXKERNEL = ../arm/izamax.c
83+
84+
ISAMINKERNEL = ../arm/iamin.c
85+
IDAMINKERNEL = ../arm/iamin.c
86+
ICAMINKERNEL = ../arm/izamin.c
87+
IZAMINKERNEL = ../arm/izamin.c
88+
89+
ISMAXKERNEL = ../arm/imax.c
90+
IDMAXKERNEL = ../arm/imax.c
91+
92+
ISMINKERNEL = ../arm/imin.c
93+
IDMINKERNEL = ../arm/imin.c
94+
95+
SASUMKERNEL = ../arm/asum.c
96+
DASUMKERNEL = ../arm/asum.c
97+
CASUMKERNEL = ../arm/zasum.c
98+
ZASUMKERNEL = ../arm/zasum.c
99+
100+
SSUMKERNEL = ../arm/sum.c
101+
DSUMKERNEL = ../arm/sum.c
102+
CSUMKERNEL = ../arm/zsum.c
103+
ZSUMKERNEL = ../arm/zsum.c
104+
105+
SAXPYKERNEL = ../arm/axpy.c
106+
DAXPYKERNEL = ../arm/axpy.c
107+
CAXPYKERNEL = ../arm/zaxpy.c
108+
ZAXPYKERNEL = ../arm/zaxpy.c
109+
110+
SCOPYKERNEL = ../arm/copy.c
111+
DCOPYKERNEL = ../arm/copy.c
112+
CCOPYKERNEL = ../arm/zcopy.c
113+
ZCOPYKERNEL = ../arm/zcopy.c
114+
115+
SDOTKERNEL = ../arm/dot.c
116+
DDOTKERNEL = ../arm/dot.c
117+
CDOTKERNEL = ../arm/zdot.c
118+
ZDOTKERNEL = ../arm/zdot.c
119+
120+
SNRM2KERNEL = ../arm/nrm2.c
121+
DNRM2KERNEL = ../arm/nrm2.c
122+
CNRM2KERNEL = ../arm/znrm2.c
123+
ZNRM2KERNEL = ../arm/znrm2.c
124+
125+
SROTKERNEL = ../arm/rot.c
126+
DROTKERNEL = ../arm/rot.c
127+
CROTKERNEL = ../arm/zrot.c
128+
ZROTKERNEL = ../arm/zrot.c
129+
130+
SSCALKERNEL = ../arm/scal.c
131+
DSCALKERNEL = ../arm/scal.c
132+
CSCALKERNEL = ../arm/zscal.c
133+
ZSCALKERNEL = ../arm/zscal.c
134+
135+
SSWAPKERNEL = ../arm/swap.c
136+
DSWAPKERNEL = ../arm/swap.c
137+
CSWAPKERNEL = ../arm/zswap.c
138+
ZSWAPKERNEL = ../arm/zswap.c
139+
140+
SGEMVNKERNEL = ../arm/gemv_n.c
141+
DGEMVNKERNEL = ../arm/gemv_n.c
142+
CGEMVNKERNEL = ../arm/zgemv_n.c
143+
ZGEMVNKERNEL = ../arm/zgemv_n.c
144+
145+
SGEMVTKERNEL = ../arm/gemv_t.c
146+
DGEMVTKERNEL = ../arm/gemv_t.c
147+
CGEMVTKERNEL = ../arm/zgemv_t.c
148+
ZGEMVTKERNEL = ../arm/zgemv_t.c
149+
150+
SSYMV_U_KERNEL = ../generic/symv_k.c
151+
SSYMV_L_KERNEL = ../generic/symv_k.c
152+
DSYMV_U_KERNEL = ../generic/symv_k.c
153+
DSYMV_L_KERNEL = ../generic/symv_k.c
154+
QSYMV_U_KERNEL = ../generic/symv_k.c
155+
QSYMV_L_KERNEL = ../generic/symv_k.c
156+
CSYMV_U_KERNEL = ../generic/zsymv_k.c
157+
CSYMV_L_KERNEL = ../generic/zsymv_k.c
158+
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
159+
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
160+
XSYMV_U_KERNEL = ../generic/zsymv_k.c
161+
XSYMV_L_KERNEL = ../generic/zsymv_k.c
162+
163+
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
164+
ZHEMV_L_KERNEL = ../generic/zhemv_k.c
165+
166+
LSAME_KERNEL = ../generic/lsame.c
167+
SCABS_KERNEL = ../generic/cabs.c
168+
DCABS_KERNEL = ../generic/cabs.c
169+
QCABS_KERNEL = ../generic/cabs.c
170+
171+
#Dump kernel
172+
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
173+
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c

0 commit comments

Comments
 (0)