Skip to content

Commit 9562d09

Browse files
author
Andy Ferris
committed
Small fixes and benchmark for README
Probably should generate a full benchmark when I have time
1 parent e007d7c commit 9562d09

File tree

4 files changed

+159
-143
lines changed

4 files changed

+159
-143
lines changed

README.md

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -29,30 +29,63 @@ than `Base.Array`. See this sample benchmark (or see the full results [here](htt
2929

3030
```
3131
=====================================
32-
Benchmarks for 3×3 matrices
32+
Benchmarks for 3×3 matrices
3333
=====================================
3434
35-
Matrix multiplication
36-
---------------------
37-
Array -> 3.973188 seconds (74.07 M allocations: 6.623 GB, 12.92% gc time)
38-
SArray -> 0.326989 seconds (5 allocations: 240 bytes)
39-
MArray -> 2.248258 seconds (37.04 M allocations: 2.759 GB, 14.06% gc time)
40-
41-
Matrix multiplication (mutating)
42-
--------------------------------
43-
Array -> 2.237091 seconds (6 allocations: 480 bytes)
44-
MArray -> 0.795372 seconds (6 allocations: 320 bytes)
45-
46-
Matrix addition
47-
---------------
48-
Array -> 2.610709 seconds (44.44 M allocations: 3.974 GB, 11.81% gc time)
49-
SArray -> 0.073024 seconds (5 allocations: 240 bytes)
50-
MArray -> 0.896849 seconds (22.22 M allocations: 1.656 GB, 21.33% gc time)
51-
52-
Matrix addition (mutating)
53-
--------------------------
54-
Array -> 0.872791 seconds (6 allocations: 480 bytes)
55-
MArray -> 0.145895 seconds (5 allocations: 240 bytes)
35+
Matrix multiplication (8.2x speedup)
36+
------------------------------------
37+
Array -> 2.024568 seconds (74.07 M allocations: 6.623 GB, 9.37% gc time)
38+
SArray -> 0.247364 seconds (5 allocations: 240 bytes)
39+
MArray -> 1.603798 seconds (37.04 M allocations: 2.759 GB, 13.61% gc time)
40+
SizedArray -> 2.223853 seconds (74.07 M allocations: 6.071 GB, 10.05% gc time)
41+
42+
Matrix multiplication (mutating) (3.1x speedup)
43+
-----------------------------------------------
44+
Array -> 1.360940 seconds (6 allocations: 480 bytes)
45+
MArray -> 0.443528 seconds (7 allocations: 400 bytes)
46+
SizedArray -> 0.681896 seconds (7 allocations: 416 bytes)
47+
48+
Matrix addition (45x speedup)
49+
-----------------------------
50+
Array -> 1.458899 seconds (44.44 M allocations: 3.974 GB, 7.96% gc time)
51+
SArray -> 0.032043 seconds (5 allocations: 240 bytes)
52+
MArray -> 0.682318 seconds (22.22 M allocations: 1.656 GB, 18.70% gc time)
53+
SizedArray -> 1.111785 seconds (44.44 M allocations: 3.643 GB, 12.02% gc time)
54+
55+
Matrix addition (mutating) (5.1x speedup)
56+
-----------------------------------------
57+
Array -> 0.493796 seconds (5 allocations: 320 bytes)
58+
MArray -> 0.096303 seconds (5 allocations: 240 bytes)
59+
SizedArray -> 0.135803 seconds (6 allocations: 336 bytes)
60+
61+
Matrix determinant (170x speedup)
62+
---------------------------------
63+
Array -> 15.291557 seconds (222.22 M allocations: 12.694 GB, 16.97% gc time)
64+
SArray -> 0.094409 seconds (4 allocations: 160 bytes)
65+
MArray -> 0.089569 seconds (4 allocations: 160 bytes)
66+
SizedArray -> 0.114134 seconds (4 allocations: 160 bytes)
67+
68+
Matrix inverse (125x speedup)
69+
-----------------------------
70+
Array -> 47.704314 seconds (407.41 M allocations: 82.232 GB, 23.67% gc time)
71+
SArray -> 0.379657 seconds (4 allocations: 160 bytes)
72+
MArray -> 1.294672 seconds (37.04 M allocations: 2.759 GB, 19.77% gc time)
73+
SizedArray -> 2.136363 seconds (74.07 M allocations: 6.071 GB, 8.61% gc time)
74+
75+
Matrix symmetric eigenvalue (105x speedup)
76+
------------------------------------------
77+
Array -> 418.304283 seconds (740.74 M allocations: 89.407 GB, 1.22% gc time)
78+
SArray -> 3.963118 seconds (5 allocations: 256 bytes)
79+
MArray -> 3.964029 seconds (6 allocations: 272 bytes)
80+
SizedArray -> 4.028497 seconds (6 allocations: 208 bytes)
81+
82+
Matrix Cholesky (23.6x speedup)
83+
-------------------------------
84+
Array -> 8.139431 seconds (222.22 M allocations: 9.934 GB, 6.28% gc time)
85+
SArray -> 0.344283 seconds (5 allocations: 256 bytes)
86+
MArray -> 0.812532 seconds (37.04 M allocations: 2.759 GB, 6.19% gc time)
87+
SizedArray -> 2.225999 seconds (74.07 M allocations: 6.071 GB, 11.41% gc time)
88+
5689
```
5790

5891
(Run with `julia -O3` for even faster SIMD code with immutable static arrays!)

perf/benchmark2.jl

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ M_g = div(2*10^8, N^2)
1616
# Size
1717

1818
A = rand(Float64,N,N)
19+
A = A*A'
1920
As = SMatrix{N,N}(A)
2021
Am = MMatrix{N,N}(A)
22+
Az = Size(N,N)(copy(A))
2123
@static if fsa
2224
Af = Mat(ntuple(j -> ntuple(i->A[i,j], N), N)) # there is a bug in FixedSizeArrays Mat constructor (13 July 2016)
2325
end
@@ -36,9 +38,19 @@ if !isdefined(:f_mut_marray) || !isdefined(:benchmark_suite) || benchmark_suite
3638
@generated f_blas_marray(n::Integer, A) = :(@inbounds (C = similar(A); C[:] = A[:]; tmp = similar(A); for i = 1:n; StaticArrays.A_mul_B_blas!(tmp, C, A); C.data = tmp.data; end; return C))
3739

3840
@generated g(n::Integer, A) = :(@inbounds (C = A; for i = 1:n; C = C + A; end; return C))
39-
@generated g_mut(n::Integer, A) = :(@inbounds (C = similar(A); C[:] = A[:]; for i = 1:n; @inbounds map!(+, C, C, A); end; return C))
41+
@generated g_mut(n::Integer, A) = :(@inbounds (C = copy(A); for i = 1:n; @inbounds map!(+, C, C, A); end; return C))
4042
@generated g_via_sarray{M}(n::Integer, A::MMatrix{M,M}) = :(@inbounds (C = similar(A); C[:] = A[:]; for i = 1:n; C = MMatrix{M,M}(SMatrix{M,M}(C) + SMatrix{M,M}(A)); end; return C))
4143

44+
@noinline _det(x) = det(x)
45+
@noinline _inv(x) = inv(x)
46+
@noinline _eig(x) = eig(x)
47+
@noinline _chol(x) = chol(x)
48+
49+
f_det(n::Int, A) = (for i = 1:n; _det(A); end)
50+
f_inv(n::Int, A) = (for i = 1:n; _inv(A); end)
51+
f_eig(n::Int, A) = (for i = 1:n; _eig(A); end)
52+
f_chol(n::Int, A) = (for i = 1:n; _chol(A); end)
53+
4254
# Notes: - A[:] = B[:] is allocating in Base, unlike `map!`
4355
# - Also, the same goes for Base's implementation of broadcast!(f, A, B, C) (but map! is OK).
4456
# - I really need to implement copy() in StaticArrays... (and maybe a special instance of setindex!(C, :))
@@ -122,11 +134,14 @@ end
122134
# Warmup and some checks
123135
Cs = f(2, As)
124136
Cm = f(2, Am)
137+
Cz = f(2, Az)
125138

126139
Cs::SMatrix
127140
if N <= 4; @assert Cs C; end
128141
Cm::MMatrix
129142
if N <= 4; @assert Cm C; end
143+
Cz::SizedMatrix
144+
if N <= 4; @assert Cz C; end
130145

131146
@static if fsa
132147
@static if all_methods
@@ -149,6 +164,10 @@ Cm_mut = f_mut_marray(2, Am)
149164
Cm_mut::MMatrix
150165
if N <= 4; @assert Cm_mut C; end
151166

167+
Cz_mut = f_mut_array(2, Az)
168+
Cz_mut::SizedMatrix
169+
if N <= 4; @assert Cz_mut C; end
170+
152171
@static if all_methods
153172
println()
154173
print("A_mul_B!(MMatrix, MMatrix) compilation time (unrolled):")
@@ -196,13 +215,19 @@ if N <= 4; @assert C_mut ≈ C; end
196215
Cs = g(2, As)
197216
Cm = g(2, Am)
198217
Cm_mut = g_mut(2, Am)
218+
Cz = g(2, Az)
219+
Cz_mut = g_mut(2, Az)
199220

200221
Cs::SMatrix
201222
if N <= 4; @assert Cs == C; end
202223
Cm::MMatrix
203224
if N <= 4; @assert Cm == C; end
204225
Cm_mut::MMatrix
205226
if N <= 4; @assert Cm_mut == C; end
227+
Cz::SizedMatrix
228+
if N <= 4; @assert Cz == C; end
229+
Cz_mut::SizedMatrix
230+
if N <= 4; @assert Cz_mut == C; end
206231

207232
@static if all_methods
208233
Cm_via_sarray = g_via_sarray(2, Am)
@@ -216,6 +241,29 @@ end
216241
if N <= 4; @assert Cf == C; end
217242
end
218243

244+
if N <= 3
245+
# det, eig etc
246+
C = f_det(2, A)
247+
Cs = f_det(2, As)
248+
Cm = f_det(2, Am)
249+
Cz = f_det(2, Az)
250+
251+
C = f_inv(2, A)
252+
Cs = f_inv(2, As)
253+
Cm = f_inv(2, Am)
254+
Cz = f_inv(2, Az)
255+
256+
C = f_eig(2, Symmetric(A))
257+
Cs = f_eig(2, Symmetric(As))
258+
Cm = f_eig(2, Symmetric(Am))
259+
Cz = f_eig(2, Symmetric(Az))
260+
261+
C = f_chol(2, Symmetric(A))
262+
Cs = f_chol(2, Symmetric(As))
263+
Cm = f_chol(2, Symmetric(Am))
264+
Cz = f_chol(2, Symmetric(Az))
265+
end
266+
219267
println()
220268

221269
# Do the performance tests
@@ -229,6 +277,7 @@ begin
229277
end
230278
print("SArray ->"); @time f(M_f, As)
231279
print("MArray ->"); @time f(M_f, Am)
280+
print("SizedArray ->"); @time f(M_f, Az)
232281
@static if all_methods
233282
print("SArray (unrolled) ->"); @time f_unrolled(M_f, As)
234283
print("SArray (chunks) ->"); @time f_unrolled_chunks(M_f, As)
@@ -246,6 +295,7 @@ println("--------------------------------")
246295
begin
247296
print("Array ->"); @time f_mut_array(M_f, A)
248297
print("MArray ->"); @time f_mut_marray(M_f, Am)
298+
print("SizedArray ->"); @time f_mut_array(M_f, Az)
249299
@static if all_methods
250300
print("MArray (unrolled) ->"); @time f_mut_unrolled(M_f, Am)
251301
print("MArray (chunks) ->"); @time f_mut_chunks(M_f, Am)
@@ -263,6 +313,7 @@ begin
263313
end
264314
print("SArray ->"); @time g(M_g, As)
265315
print("MArray ->"); @time g(M_g, Am)
316+
print("SizedArray ->"); @time g(M_g, Az)
266317
@static if all_methods
267318
print("MArray (via SArray) ->"); @time g_via_sarray(M_g, Am)
268319
end
@@ -272,7 +323,50 @@ println()
272323
println("Matrix addition (mutating)")
273324
println("--------------------------")
274325
begin
275-
print("Array ->"); @time g_mut(M_g, A) # broadcast! seems to be broken!
276-
print("MArray ->"); @time g_mut(M_g, Am)
326+
print("Array ->"); @time g_mut(M_g, A) # broadcast! seems to be broken!
327+
print("MArray ->"); @time g_mut(M_g, Am)
328+
print("SizedArray ->"); @time g_mut(M_g, Az)
277329
end
278330
println()
331+
332+
if N <= 3
333+
println("Matrix determinant")
334+
println("------------------")
335+
begin
336+
print("Array ->"); @time f_det(M_f, A)
337+
print("SArray ->"); @time f_det(M_f, As)
338+
print("MArray ->"); @time f_det(M_f, Am)
339+
print("SizedArray ->"); @time f_det(M_f, Az)
340+
end
341+
println()
342+
343+
println("Matrix inverse")
344+
println("--------------")
345+
begin
346+
print("Array ->"); @time f_inv(M_f, A)
347+
print("SArray ->"); @time f_inv(M_f, As)
348+
print("MArray ->"); @time f_inv(M_f, Am)
349+
print("SizedArray ->"); @time f_inv(M_f, Az)
350+
end
351+
println()
352+
353+
println("Matrix symmetric eigenvalue")
354+
println("---------------------------")
355+
begin
356+
print("Array ->"); @time f_eig(M_f, Symmetric(A))
357+
print("SArray ->"); @time f_eig(M_f, Symmetric(As))
358+
print("MArray ->"); @time f_eig(M_f, Symmetric(Am))
359+
print("SizedArray ->"); @time f_eig(M_f, Symmetric(Az))
360+
end
361+
println()
362+
363+
println("Matrix Cholesky")
364+
println("---------------")
365+
begin
366+
print("Array ->"); @time f_chol(M_f, Symmetric(A))
367+
print("SArray ->"); @time f_chol(M_f, Symmetric(As))
368+
print("MArray ->"); @time f_chol(M_f, Symmetric(Am))
369+
print("SizedArray ->"); @time f_chol(M_f, Symmetric(Az))
370+
end
371+
println()
372+
end #if N <= 3

src/cholesky.jl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,12 @@
55
end
66

77
@inline function Base.chol{T<:Real, SM <: StaticMatrix}(A::Base.LinAlg.RealHermSymComplexHerm{T,SM})
8-
ishermitian(A) || Base.LinAlg.non_hermitian_error("chol")
9-
_chol(Size(A), A)
10-
end
11-
12-
@inline function Base.chol{SM<:StaticMatrix}(A::Symmetric{SM})
13-
eltype(A) <: Real && (ishermitian(A) || Base.LinAlg.non_hermitian_error("chol"))
14-
_chol(Size(A), A)
8+
_chol(Size(A), A.data)
159
end
10+
#=
11+
@inline function Base.chol{T<:Real,SM<:StaticMatrix}(A::Symmetric{T,SM})
12+
_chol(Size(A), A.data)
13+
end=#
1614

1715
@generated function _chol(::Size{(1,1)}, A::StaticMatrix)
1816
@assert size(A) == (1,1)

0 commit comments

Comments
 (0)