Skip to content

Commit c644e89

Browse files
pablosanjoseKristofferC
authored andcommitted
apply OpenBLAS_jll v0.3.23+4 patch (#53074)
Closes #53054 CC: @giordano @ViralBShah
1 parent a45cd5f commit c644e89

File tree

3 files changed

+269
-1
lines changed

3 files changed

+269
-1
lines changed

deps/openblas.mk

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,12 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied: $(BUILDD
100100
patch -p1 -f < $(SRCDIR)/patches/neoverse-generic-kernels.patch
101101
echo 1 > $@
102102

103-
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
103+
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-m1-4003.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
104+
cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
105+
patch -p1 -f < $(SRCDIR)/patches/openblas-m1-4003.patch
106+
echo 1 > $@
107+
108+
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-m1-4003.patch-applied
104109
echo 1 > $@
105110

106111
$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured

deps/patches/openblas-m1-4003.patch

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
From caa2945138f3c8a6f3f0dacbaf653c283e3cd2cb Mon Sep 17 00:00:00 2001
2+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
3+
Date: Tue, 11 Apr 2023 00:04:09 +0200
4+
Subject: [PATCH] Support Apple A15/M2 cpus through the existing VORTEX target
5+
6+
---
7+
cpuid_arm64.c | 3 ++-
8+
1 file changed, 2 insertions(+), 1 deletion(-)
9+
10+
diff --git a/cpuid_arm64.c b/cpuid_arm64.c
11+
index 1080ea974..809f48e95 100644
12+
--- a/cpuid_arm64.c
13+
+++ b/cpuid_arm64.c
14+
@@ -268,7 +268,8 @@ int detect(void)
15+
#else
16+
#ifdef __APPLE__
17+
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
18+
- if (value ==131287967|| value == 458787763 ) return CPU_VORTEX;
19+
+ if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
20+
+ if (value == 3660830781) return CPU_VORTEX; //A15/M2
21+
#endif
22+
return CPU_ARMV8;
23+
#endif
24+
25+
From cda29633a30bf7ecbc64f85e4bcc6517ad954f1c Mon Sep 17 00:00:00 2001
26+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
27+
Date: Thu, 13 Apr 2023 17:59:48 +0200
28+
Subject: [PATCH 1/8] move ALPHA_I out of register 18 (reserved on OSX)
29+
30+
---
31+
kernel/arm64/cgemm_kernel_8x4.S | 2 +-
32+
1 file changed, 1 insertion(+), 1 deletion(-)
33+
34+
diff --git a/kernel/arm64/cgemm_kernel_8x4.S b/kernel/arm64/cgemm_kernel_8x4.S
35+
index 24e08a646a..f100adc7af 100644
36+
--- a/kernel/arm64/cgemm_kernel_8x4.S
37+
+++ b/kernel/arm64/cgemm_kernel_8x4.S
38+
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39+
#define pCRow3 x15
40+
#define pA x16
41+
#define alphaR w17
42+
-#define alphaI w18
43+
+#define alphaI w19
44+
45+
#define alpha0_R s10
46+
#define alphaV0_R v10.s[0]
47+
48+
From c7bbad09adf8cdd2fa4b8709ea669e530a0136a4 Mon Sep 17 00:00:00 2001
49+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
50+
Date: Thu, 13 Apr 2023 18:00:47 +0200
51+
Subject: [PATCH 2/8] Move ALPHA_I out of register 18 (reserved on OSX)
52+
53+
---
54+
kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S | 2 +-
55+
1 file changed, 1 insertion(+), 1 deletion(-)
56+
57+
diff --git a/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S b/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S
58+
index 29a68ff227..2c63925be2 100644
59+
--- a/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S
60+
+++ b/kernel/arm64/cgemm_kernel_8x4_thunderx2t99.S
61+
@@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62+
#define pCRow3 x15
63+
#define pA x16
64+
#define alphaR w17
65+
-#define alphaI w18
66+
+#define alphaI w19
67+
68+
#define alpha0_R s10
69+
#define alphaV0_R v10.s[0]
70+
71+
From 0b1acb0ba3aa327fee65bc6bcf596080dfc39f4b Mon Sep 17 00:00:00 2001
72+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
73+
Date: Thu, 13 Apr 2023 18:03:35 +0200
74+
Subject: [PATCH 3/8] Move ALPHA_I out of register 18 (reserved on OSX)
75+
76+
---
77+
kernel/arm64/ctrmm_kernel_8x4.S | 8 ++++----
78+
1 file changed, 4 insertions(+), 4 deletions(-)
79+
80+
diff --git a/kernel/arm64/ctrmm_kernel_8x4.S b/kernel/arm64/ctrmm_kernel_8x4.S
81+
index 5c08273975..e8f1d8cf30 100644
82+
--- a/kernel/arm64/ctrmm_kernel_8x4.S
83+
+++ b/kernel/arm64/ctrmm_kernel_8x4.S
84+
@@ -49,10 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
85+
#define pCRow3 x15
86+
#define pA x16
87+
#define alphaR w17
88+
-#define alphaI w18
89+
-#define temp x19
90+
-#define tempOffset x20
91+
-#define tempK x21
92+
+#define alphaI w19
93+
+#define temp x20
94+
+#define tempOffset x21
95+
+#define tempK x22
96+
97+
#define alpha0_R s10
98+
#define alphaV0_R v10.s[0]
99+
100+
From 108a21e47a754032a9fb5477afcb76c6c158a146 Mon Sep 17 00:00:00 2001
101+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
102+
Date: Thu, 13 Apr 2023 18:05:14 +0200
103+
Subject: [PATCH 4/8] Move ALPHA out of register 18 (reserved on OSX)
104+
105+
---
106+
kernel/arm64/sgemm_kernel_sve_v2x8.S | 4 ++--
107+
1 file changed, 2 insertions(+), 2 deletions(-)
108+
109+
diff --git a/kernel/arm64/sgemm_kernel_sve_v2x8.S b/kernel/arm64/sgemm_kernel_sve_v2x8.S
110+
index c969ed4db4..60e1f347b8 100644
111+
--- a/kernel/arm64/sgemm_kernel_sve_v2x8.S
112+
+++ b/kernel/arm64/sgemm_kernel_sve_v2x8.S
113+
@@ -55,8 +55,8 @@ With this approach, we can reuse sgemm_n|tcopy_sve_v1.c packing functions. */
114+
#define lanes x15
115+
#define pA1 x16
116+
#define pA2 x17
117+
-#define alpha w18
118+
-#define vec_len x19
119+
+#define alpha w19
120+
+#define vec_len x20
121+
#define vec_lenx2 x20
122+
123+
#define alpha0 s10
124+
125+
From 3727672a74c18938230c3a2db012a5693688bfd6 Mon Sep 17 00:00:00 2001
126+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
127+
Date: Thu, 13 Apr 2023 18:07:52 +0200
128+
Subject: [PATCH 5/8] Improve workaround and keep compilers from optimizing it
129+
out
130+
131+
---
132+
kernel/arm64/dznrm2_thunderx2t99.c | 6 ++++--
133+
1 file changed, 4 insertions(+), 2 deletions(-)
134+
135+
diff --git a/kernel/arm64/dznrm2_thunderx2t99.c b/kernel/arm64/dznrm2_thunderx2t99.c
136+
index e342b0b63f..0bd274b3f1 100644
137+
--- a/kernel/arm64/dznrm2_thunderx2t99.c
138+
+++ b/kernel/arm64/dznrm2_thunderx2t99.c
139+
@@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
140+
141+
142+
#include "common.h"
143+
-
144+
+#include <float.h>
145+
#include <arm_neon.h>
146+
147+
#if defined(SMP)
148+
@@ -344,6 +344,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
149+
FLOAT dummy_alpha[2];
150+
#endif
151+
FLOAT ssq, scale;
152+
+ volatile FLOAT sca;
153+
154+
if (n <= 0 || inc_x <= 0) return 0.0;
155+
156+
@@ -404,7 +405,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
157+
#else
158+
nrm2_compute(n, x, inc_x, &ssq, &scale);
159+
#endif
160+
- if (fabs(scale) <1.e-300) return 0.;
161+
+ sca = fabs(scale);
162+
+ if (sca < DBL_MIN) return 0.;
163+
ssq = sqrt(ssq) * scale;
164+
165+
return ssq;
166+
167+
From f096a339e4a22f4bc6dc454640e5d4007b07368b Mon Sep 17 00:00:00 2001
168+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
169+
Date: Thu, 13 Apr 2023 18:16:09 +0200
170+
Subject: [PATCH 6/8] Use long value fields for cpu ident on OSX
171+
172+
---
173+
cpuid_arm64.c | 6 +++---
174+
1 file changed, 3 insertions(+), 3 deletions(-)
175+
176+
diff --git a/cpuid_arm64.c b/cpuid_arm64.c
177+
index 809f48e95a..e586f9a3c2 100644
178+
--- a/cpuid_arm64.c
179+
+++ b/cpuid_arm64.c
180+
@@ -267,9 +267,9 @@ int detect(void)
181+
}
182+
#else
183+
#ifdef __APPLE__
184+
- sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
185+
- if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
186+
- if (value == 3660830781) return CPU_VORTEX; //A15/M2
187+
+ sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
188+
+ if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
189+
+ if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
190+
#endif
191+
return CPU_ARMV8;
192+
#endif
193+
194+
From 8be68fa7f4edfa0c65949faf67f8feea2c7f0f43 Mon Sep 17 00:00:00 2001
195+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
196+
Date: Sat, 15 Apr 2023 12:02:39 +0200
197+
Subject: [PATCH 7/8] move declaration of sca to really keep the compiler from
198+
throwing it out (for now)
199+
200+
---
201+
kernel/arm64/dznrm2_thunderx2t99.c | 3 +--
202+
1 file changed, 1 insertion(+), 2 deletions(-)
203+
204+
diff --git a/kernel/arm64/dznrm2_thunderx2t99.c b/kernel/arm64/dznrm2_thunderx2t99.c
205+
index 0bd274b3f1..6077c85dd1 100644
206+
--- a/kernel/arm64/dznrm2_thunderx2t99.c
207+
+++ b/kernel/arm64/dznrm2_thunderx2t99.c
208+
@@ -344,7 +344,6 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
209+
FLOAT dummy_alpha[2];
210+
#endif
211+
FLOAT ssq, scale;
212+
- volatile FLOAT sca;
213+
214+
if (n <= 0 || inc_x <= 0) return 0.0;
215+
216+
@@ -405,7 +404,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
217+
#else
218+
nrm2_compute(n, x, inc_x, &ssq, &scale);
219+
#endif
220+
- sca = fabs(scale);
221+
+ volatile FLOAT sca = fabs(scale);
222+
if (sca < DBL_MIN) return 0.;
223+
ssq = sqrt(ssq) * scale;
224+
225+
226+
From 44164e3a3d7f5c956728596b9f88d43cad0a8c14 Mon Sep 17 00:00:00 2001
227+
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
228+
Date: Mon, 17 Apr 2023 14:23:13 +0200
229+
Subject: [PATCH 8/8] revert "move alpha out of register 18" (out of PR scope,
230+
no SVE on Apple hw)
231+
232+
---
233+
kernel/arm64/sgemm_kernel_sve_v2x8.S | 4 ++--
234+
1 file changed, 2 insertions(+), 2 deletions(-)
235+
236+
diff --git a/kernel/arm64/sgemm_kernel_sve_v2x8.S b/kernel/arm64/sgemm_kernel_sve_v2x8.S
237+
index 60e1f347b8..c969ed4db4 100644
238+
--- a/kernel/arm64/sgemm_kernel_sve_v2x8.S
239+
+++ b/kernel/arm64/sgemm_kernel_sve_v2x8.S
240+
@@ -55,8 +55,8 @@ With this approach, we can reuse sgemm_n|tcopy_sve_v1.c packing functions. */
241+
#define lanes x15
242+
#define pA1 x16
243+
#define pA2 x17
244+
-#define alpha w19
245+
-#define vec_len x20
246+
+#define alpha w18
247+
+#define vec_len x19
248+
#define vec_lenx2 x20
249+
250+
#define alpha0 s10

stdlib/LinearAlgebra/test/blas.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,19 @@ Random.seed!(100)
126126
@test BLAS.iamax(b) == findmax(fabs, b)[2] * (step(ind) >= 0)
127127
end
128128
end
129+
@testset "deterministic mul!" begin
130+
# mul! should be deterministic, see #53054
131+
function tester_53054()
132+
C = ComplexF32
133+
mat = zeros(C, 1, 1)
134+
for _ in 1:100
135+
v = [C(1-0.2im) C(2+0.3im)]
136+
mul!(mat, v, v', C(1+im), 1)
137+
end
138+
return mat
139+
end
140+
@test allequal(tester_53054() for _ in 1:10000)
141+
end
129142
@testset "scal" begin
130143
α = rand(elty)
131144
a = rand(elty,n)

0 commit comments

Comments
 (0)