diff --git a/.gitignore b/.gitignore index 0127a75f..189932f2 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,14 @@ /docs/build/ /docs/Manifest.toml -/docs/src/assets/themes -/docs/src/assets/favicon.ico -/docs/src/assets/logo.svg -/docs/src/assets/logo-dark.svg + +# vitepress +/docs_vitepress/build/ +/docs_vitepress/final_site/ +/docs_vitepress/node_modules/ +/docs_vitepress/package-lock.json + +# MAC +.DS_Store +.AppleDouble +.LSOverride \ No newline at end of file diff --git a/docs_vitepress/Manifest.toml b/docs_vitepress/Manifest.toml new file mode 100644 index 00000000..d2f847cb --- /dev/null +++ b/docs_vitepress/Manifest.toml @@ -0,0 +1,1655 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.1" +manifest_format = "2.0" +project_hash = "e57c805e7244fbca50c8975b13217e4ce670267e" + +[[deps.ANSIColoredPrinters]] +git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c" +uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9" +version = "0.0.1" + +[[deps.AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.5.0" +weakdeps = ["ChainRulesCore", "Test"] + + [deps.AbstractFFTs.extensions] + AbstractFFTsChainRulesCoreExt = "ChainRulesCore" + AbstractFFTsTestExt = "Test" + +[[deps.AbstractTrees]] +git-tree-sha1 = "2d9c9a55f9c93e8887ad391fbae72f8ef55e1177" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.5" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "50c3c56a52972d78e8be9fd135bfb91c9574c140" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "4.1.1" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.2" + +[[deps.ArnoldiMethod]] +deps = ["LinearAlgebra", "Random", "StaticArrays"] +git-tree-sha1 = "d57bd3762d308bded22c3b82d033bff85f6195c6" +uuid = "ec485272-7323-5ecc-a04f-4719b315124d" +version = "0.4.0" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.AxisAlgorithms]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] +git-tree-sha1 = "01b8ccb13d68535d73d2b0c23e39bd23155fb712" +uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" +version = "1.1.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.BenchmarkTools]] +deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"] +git-tree-sha1 = "f1dff6729bc61f4d49e140da1af55dcd1ac97b2f" +uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +version = "1.5.0" + +[[deps.BitFlags]] +git-tree-sha1 = "0691e34b3bb8be9307330f88d1a3c3f25466c24d" +uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" +version = "0.1.9" + +[[deps.Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "8873e196c2eb87962a2048b3b8e08946535864a1" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.8+2" + +[[deps.CEnum]] +git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.5.0" + +[[deps.Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "009060c9a6168704143100f36ab08f06c2af4642" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.18.2+1" + +[[deps.ChainRules]] +deps = ["Adapt", "ChainRulesCore", "Compat", "Distributed", "GPUArraysCore", "IrrationalConstants", "LinearAlgebra", "Random", "RealDot", "SparseArrays", "SparseInverseSubset", "Statistics", "StructArrays", "SuiteSparse"] +git-tree-sha1 = "be227d253d132a6d57f9ccf5f67c0fb6488afd87" +uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" +version = "1.71.0" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "3e4b134270b372f2ed4d4d0e936aabaefc1802bc" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.25.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.6" + +[[deps.ColorSchemes]] +deps = ["ColorTypes", "ColorVectorSpace", "Colors", "FixedPointNumbers", "PrecompileTools", "Random"] +git-tree-sha1 = "13951eb68769ad1cd460cdb2e64e5e95f1bf123d" +uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +version = "3.27.0" + +[[deps.ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "b10d0b65641d57b8b4d5e234446582de5047050d" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.11.5" + +[[deps.ColorVectorSpace]] +deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "Requires", "Statistics", "TensorCore"] +git-tree-sha1 = "a1f44953f2382ebb937d60dafbe2deea4bd23249" +uuid = "c3611d14-8923-5661-9e6a-0046d554d3a4" +version = "0.10.0" +weakdeps = ["SpecialFunctions"] + + [deps.ColorVectorSpace.extensions] + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "Reexport"] +git-tree-sha1 = "362a287c3aa50601b0bc359053d5c2468f0e7ce0" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.12.11" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools"] +git-tree-sha1 = "cda2cfaebb4be89c9084adaca7dd7333369715c5" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.1" + +[[deps.Compat]] +deps = ["TOML", "UUIDs"] +git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.16.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.1.1+0" + +[[deps.ConcurrentUtilities]] +deps = ["Serialization", "Sockets"] +git-tree-sha1 = "ea32b83ca4fefa1768dc84e504cc0a94fb1ab8d1" +uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" +version = "2.4.2" + +[[deps.ConstructionBase]] +git-tree-sha1 = "76219f1ed5771adbb096743bff43fb5fdd4c1157" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.8" + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseLinearAlgebraExt = "LinearAlgebra" + ConstructionBaseStaticArraysExt = "StaticArrays" + + [deps.ConstructionBase.weakdeps] + IntervalSets = "8197267c-284f-5f27-9208-e0e47529a953" + LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.Contour]] +git-tree-sha1 = "439e35b0b36e2e5881738abc8857bd92ad6ff9a8" +uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" +version = "0.6.3" + +[[deps.DataAPI]] +git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.16.0" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.20" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.Dbus_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "fc173b380865f70627d7dd1190dc2fce6cc105af" +uuid = "ee1fde0b-3d02-5ea6-8484-8dfef6360eab" +version = "1.14.10+0" + +[[deps.DelimitedFiles]] +deps = ["Mmap"] +git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" +version = "1.9.1" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +version = "1.11.0" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Documenter]] +deps = ["ANSIColoredPrinters", "AbstractTrees", "Base64", "CodecZlib", "Dates", "DocStringExtensions", "Downloads", "Git", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "MarkdownAST", "Pkg", "PrecompileTools", "REPL", "RegistryInstances", "SHA", "TOML", "Test", "Unicode"] +git-tree-sha1 = "5a1ee886566f2fa9318df1273d8b778b9d42712d" +uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +version = "1.7.0" + +[[deps.DocumenterTools]] +deps = ["AbstractTrees", "Base64", "DocStringExtensions", "Documenter", "FileWatching", "Git", "Gumbo", "LibGit2", "OpenSSH_jll", "Sass"] +git-tree-sha1 = "1eef850e88afab219e555678868d83aa901a360b" +uuid = "35a29f4d-8980-5a13-9543-d66fff28ecb8" +version = "0.1.20" + +[[deps.DocumenterVitepress]] +deps = ["ANSIColoredPrinters", "Base64", "DocStringExtensions", "Documenter", "IOCapture", "Markdown", "NodeJS_20_jll", "REPL"] +git-tree-sha1 = "aa210b5039870d3ad181877d99fd37618e94d29c" +uuid = "4710194d-e776-4893-9690-8d956a29c365" +version = "0.1.3" + + [deps.DocumenterVitepress.extensions] + DocumenterVitepressDocumenterCitationsExt = "DocumenterCitations" + + [deps.DocumenterVitepress.weakdeps] + DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e3290f2d49e661fbd94046d7e3726ffcb2d41053" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.2.4+0" + +[[deps.EpollShim_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8e9441ee83492030ace98f9789a654a6d0b1f643" +uuid = "2702e6a9-849d-5ed8-8c21-79e8b8f9ee43" +version = "0.0.20230411+0" + +[[deps.ExceptionUnwrapping]] +deps = ["Test"] +git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" +uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" +version = "0.1.10" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1c6317308b9dc757616f0b5cb379db10494443a7" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.6.2+0" + +[[deps.ExprTools]] +git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.10" + +[[deps.Extents]] +git-tree-sha1 = "81023caa0021a41712685887db1fc03db26f41f5" +uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" +version = "0.1.4" + +[[deps.FFMPEG]] +deps = ["FFMPEG_jll"] +git-tree-sha1 = "53ebe7511fa11d33bec688a9178fac4e49eeee00" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.2" + +[[deps.FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "466d45dc38e15794ec7d5d63ec03d776a9aff36e" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.4.4+1" + +[[deps.FileIO]] +deps = ["Pkg", "Requires", "UUIDs"] +git-tree-sha1 = "62ca0547a14c57e98154423419d8a342dca75ca9" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.16.4" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" + +[[deps.FillArrays]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "6a70198746448456524cb442b8af316927ff3e1a" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.13.0" + + [deps.FillArrays.extensions] + FillArraysPDMatsExt = "PDMats" + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + + [deps.FillArrays.weakdeps] + PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.FixedPointNumbers]] +deps = ["Statistics"] +git-tree-sha1 = "05882d6995ae5c12bb5f36dd2ed3f61c98cbb172" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.8.5" + +[[deps.Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Zlib_jll"] +git-tree-sha1 = "db16beca600632c95fc8aca29890d83788dd8b23" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.96+0" + +[[deps.Format]] +git-tree-sha1 = "9c68794ef81b08086aeb32eeaf33531668d5f5fc" +uuid = "1fa38f19-a742-5d3f-a2b9-30dd87b9d5f8" +version = "1.3.7" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.36" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "5c1d8ae0efc6c2e7b1fc502cbe25def8f661b7bc" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.13.2+0" + +[[deps.FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1ed150b39aebcc805c26b93a8d0122c940f64ce2" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.14+0" + +[[deps.GLFW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll", "libdecor_jll", "xkbcommon_jll"] +git-tree-sha1 = "532f9126ad901533af1d4f5c198867227a7bb077" +uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" +version = "3.4.0+1" + +[[deps.GPUArrays]] +deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] +git-tree-sha1 = "62ee71528cca49be797076a76bdc654a170a523e" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "10.3.1" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.6" + +[[deps.GR]] +deps = ["Artifacts", "Base64", "DelimitedFiles", "Downloads", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Preferences", "Printf", "Qt6Wayland_jll", "Random", "Serialization", "Sockets", "TOML", "Tar", "Test", "p7zip_jll"] +git-tree-sha1 = "ee28ddcd5517d54e417182fec3886e7412d3926f" +uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +version = "0.73.8" + +[[deps.GR_jll]] +deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "FreeType2_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Qt6Base_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "f31929b9e67066bee48eec8b03c0df47d31a74b3" +uuid = "d2c73de3-f751-5644-a686-071e5b155ba9" +version = "0.73.8+0" + +[[deps.GeoFormatTypes]] +git-tree-sha1 = "59107c179a586f0fe667024c5eb7033e81333271" +uuid = "68eda718-8dee-11e9-39e7-89f7f65f511f" +version = "0.4.2" + +[[deps.GeoInterface]] +deps = ["Extents", "GeoFormatTypes"] +git-tree-sha1 = "2f6fce56cdb8373637a6614e14a5768a88450de2" +uuid = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +version = "1.3.7" + +[[deps.GeometryBasics]] +deps = ["EarCut_jll", "Extents", "GeoInterface", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "b62f2b2d76cee0d61a2ef2b3118cd2a3215d3134" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.4.11" + +[[deps.GeometryTypes]] +deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"] +git-tree-sha1 = "d796f7be0383b5416cd403420ce0af083b0f9b28" +uuid = "4d00f742-c7ba-57c2-abde-4428a4b178cb" +version = "0.8.5" + +[[deps.Gettext_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.21.0+0" + +[[deps.Git]] +deps = ["Git_jll"] +git-tree-sha1 = "04eff47b1354d702c3a85e8ab23d539bb7d5957e" +uuid = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2" +version = "1.3.1" + +[[deps.Git_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "LibCURL_jll", "Libdl", "Libiconv_jll", "OpenSSL_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "ea372033d09e4552a04fd38361cd019f9003f4f4" +uuid = "f8c6e375-362e-5223-8a59-34ff63f689eb" +version = "2.46.2+0" + +[[deps.Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "674ff0db93fffcd11a3573986e550d66cd4fd71f" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.80.5+0" + +[[deps.GraphRecipes]] +deps = ["AbstractTrees", "GeometryTypes", "Graphs", "InteractiveUtils", "Interpolations", "LinearAlgebra", "NaNMath", "NetworkLayout", "PlotUtils", "RecipesBase", "SparseArrays", "Statistics"] +git-tree-sha1 = "10920601dc51d2231bb3d2111122045efed8def0" +uuid = "bd48cda9-67a9-57be-86fa-5b3c104eda73" +version = "0.5.13" + +[[deps.Graphite2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "344bf40dcab1073aca04aa0df4fb092f920e4011" +uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472" +version = "1.3.14+0" + +[[deps.Graphs]] +deps = ["ArnoldiMethod", "Compat", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "1dc470db8b1131cfc7fb4c115de89fe391b9e780" +uuid = "86223c79-3864-5bf0-83f7-82e725a168b6" +version = "1.12.0" + +[[deps.Grisu]] +git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2" +uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" +version = "1.0.2" + +[[deps.Gumbo]] +deps = ["AbstractTrees", "Gumbo_jll", "Libdl"] +git-tree-sha1 = "a1a138dfbf9df5bace489c7a9d5196d6afdfa140" +uuid = "708ec375-b3d6-5a57-a7ce-8257bf98657a" +version = "0.8.2" + +[[deps.Gumbo_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "29070dee9df18d9565276d68a596854b1764aa38" +uuid = "528830af-5a63-567c-a44a-034ed33b8444" +version = "0.10.2+0" + +[[deps.HTTP]] +deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] +git-tree-sha1 = "bc3f416a965ae61968c20d0ad867556367f2817d" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "1.10.9" + +[[deps.HarfBuzz_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll"] +git-tree-sha1 = "401e4f3f30f43af2c8478fc008da50096ea5240f" +uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" +version = "8.3.1+0" + +[[deps.IOCapture]] +deps = ["Logging", "Random"] +git-tree-sha1 = "b6d6bfdd7ce25b0f9b2f6b3dd56b2673a66c8770" +uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89" +version = "0.2.5" + +[[deps.IRTools]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "950c3717af761bc3ff906c2e8e52bd83390b6ec2" +uuid = "7869d1d1-7146-5819-86e3-90919afe41df" +version = "0.4.14" + +[[deps.Inflate]] +git-tree-sha1 = "d1b1b796e47d94588b3757fe84fbf65a5ec4a80d" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.5" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.Interpolations]] +deps = ["Adapt", "AxisAlgorithms", "ChainRulesCore", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "Requires", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] +git-tree-sha1 = "88a101217d7cb38a7b481ccd50d21876e1d1b0e0" +uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +version = "0.15.1" +weakdeps = ["Unitful"] + + [deps.Interpolations.extensions] + InterpolationsUnitfulExt = "Unitful" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.IterTools]] +git-tree-sha1 = "42d5f897009e7ff2cf88db414a389e5ed1bdd023" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.10.0" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLD2]] +deps = ["FileIO", "MacroTools", "Mmap", "OrderedCollections", "PrecompileTools", "Requires", "TranscodingStreams"] +git-tree-sha1 = "783c1be5213a09609b23237a0c9e5dfd258ae6f2" +uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +version = "0.5.7" + +[[deps.JLFzf]] +deps = ["Pipe", "REPL", "Random", "fzf_jll"] +git-tree-sha1 = "39d64b09147620f5ffbf6b2d3255be3c901bec63" +uuid = "1019f520-868f-41f5-a6de-eb00f4b6a39c" +version = "0.1.8" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "be3dc50a92e5a386872a493a10050136d4703f9b" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.6.1" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.JpegTurbo_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "25ee0be4d43d0269027024d75a24c24d6c6e590c" +uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" +version = "3.0.4+0" + +[[deps.LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "170b660facf5df5de098d866564877e119141cbd" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.2+0" + +[[deps.LERC_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "36bdbc52f13a7d1dcb0f3cd694e01677a515655b" +uuid = "88015f11-f218-50d7-93a8-a6af411a945d" +version = "4.0.0+0" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Unicode"] +git-tree-sha1 = "d422dfd9707bec6617335dc2ea3c5172a87d5908" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "9.1.3" + + [deps.LLVM.extensions] + BFloat16sExt = "BFloat16s" + + [deps.LLVM.weakdeps] + BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "05a8bd5a42309a9ec82f700876903abce1017dd3" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.34+0" + +[[deps.LLVMOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "78211fb6cbc872f77cad3fc0b6cf647d923f4929" +uuid = "1d63c593-3942-5779-bab2-d838dc0a180e" +version = "18.1.7+0" + +[[deps.LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "854a9c268c43b77b0a27f22d7fab8d33cdb3a731" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.2+1" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.4.0" + +[[deps.Latexify]] +deps = ["Format", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "OrderedCollections", "Requires"] +git-tree-sha1 = "ce5f5621cac23a86011836badfedf664a612cee4" +uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +version = "0.16.5" + + [deps.Latexify.extensions] + DataFramesExt = "DataFrames" + SparseArraysExt = "SparseArrays" + SymEngineExt = "SymEngine" + + [deps.Latexify.weakdeps] + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + SymEngine = "123dc426-2d89-5057-bbad-38513e3affd8" + +[[deps.LazilyInitializedFields]] +git-tree-sha1 = "8f7f3cabab0fd1800699663533b6d5cb3fc0e612" +uuid = "0e77f7df-68c5-4e49-93ce-4cd80f5598bf" +version = "1.2.2" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +version = "1.11.0" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.6.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" + +[[deps.LibGit2_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] +uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" +version = "1.7.2+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0b4a5d71f3e5200a7dff793393e09dfc2d874290" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.2+1" + +[[deps.Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll"] +git-tree-sha1 = "8be878062e0ffa2c3f67bb58a595375eda5de80b" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.11.0+0" + +[[deps.Libglvnd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"] +git-tree-sha1 = "6f73d1dd803986947b2c750138528a999a6c7733" +uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29" +version = "1.6.0+0" + +[[deps.Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "c6ce1e19f3aec9b59186bdf06cdf3c4fc5f5f3e6" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.50.0+0" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "61dfdba58e585066d8bce214c5a51eaa0539f269" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+1" + +[[deps.Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "0c4f9c4f1a50d8f35048fa0532dabbadf702f81e" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.40.1+0" + +[[deps.Libtiff_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "LERC_jll", "Libdl", "XZ_jll", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "b404131d06f7886402758c9ce2214b636eb4d54a" +uuid = "89763e89-9b03-5906-acba-b20f662cd828" +version = "4.7.0+0" + +[[deps.Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "5ee6203157c120d79034c748a2acba45b82b8807" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.40.1+0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +version = "1.11.0" + +[[deps.LiveServer]] +deps = ["HTTP", "LoggingExtras", "MIMEs", "Sockets", "Test"] +git-tree-sha1 = "564a436267fb1fc768f815dad64c4386c46623f8" +uuid = "16fef848-5104-11e9-1b77-fb7a48bbb589" +version = "1.4.0" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "a2d09619db4e765091ee5c6ffe8872849de0feea" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.28" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "f02b56007b064fbfddb4c9cd60161b6dd0f40df3" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.1.0" + +[[deps.MIMEs]] +git-tree-sha1 = "65f28ad4b594aebe22157d6fac869786a255b7eb" +uuid = "6c6e2e6c-3030-632d-7369-2d6c69616d65" +version = "0.1.4" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.13" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MarkdownAST]] +deps = ["AbstractTrees", "Markdown"] +git-tree-sha1 = "465a70f0fc7d443a00dcdc3267a497397b8a3899" +uuid = "d0879d2d-cac2-40c8-9cee-1863dc0c7391" +version = "0.1.2" + +[[deps.MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] +git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.1.9" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.6+0" + +[[deps.Measures]] +git-tree-sha1 = "c13304c81eec1ed3af7fc20e75fb6b26092a1102" +uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" +version = "0.3.2" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.2.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" +version = "1.11.0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2023.12.12" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.NetworkLayout]] +deps = ["GeometryBasics", "LinearAlgebra", "Random", "Requires", "StaticArrays"] +git-tree-sha1 = "0c51e19351dc1eecc61bc23caaf2262e7ba71973" +uuid = "46757867-2c16-5918-afeb-47bfcb05e46a" +version = "0.4.7" +weakdeps = ["Graphs"] + + [deps.NetworkLayout.extensions] + NetworkLayoutGraphsExt = "Graphs" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.NodeJS_20_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "0b1b4a83773cfdefa5d9dc0322e4c0624ce88b5b" +uuid = "c7aee132-11e1-519c-8219-0a43005e73c2" +version = "20.12.2+0" + +[[deps.OffsetArrays]] +git-tree-sha1 = "1a27764e945a152f7ca7efa04de513d473e9542e" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.14.1" +weakdeps = ["Adapt"] + + [deps.OffsetArrays.extensions] + OffsetArraysAdaptExt = "Adapt" + +[[deps.Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "887579a3eb005446d514ab7aeac5d1d027658b8f" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.5+1" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.27+1" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+2" + +[[deps.OpenSSH_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "OpenSSL_jll", "Zlib_jll"] +git-tree-sha1 = "cbb7bdfca123d91205ba9341bbaabe700934078d" +uuid = "9bd350c2-7e96-507f-8002-3f2e150b4e1b" +version = "9.9.1+1" + +[[deps.OpenSSL]] +deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] +git-tree-sha1 = "38cb508d080d21dc1128f7fb04f20387ed4c0af4" +uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" +version = "1.4.3" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "7493f61f55a6cce7325f197443aa80d32554ba10" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.0.15+1" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6703a85cb3781bd5909d48730a67205f3f31a575" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.3+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.3" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+1" + +[[deps.Pango_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "e127b609fb9ecba6f201ba7ab753d5a605d53801" +uuid = "36c8627f-9965-5494-a995-c6b170f724f3" +version = "1.54.1+0" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.1" + +[[deps.Pipe]] +git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d" +uuid = "b98c9c47-44ae-5843-9183-064241ee97a0" +version = "1.3.0" + +[[deps.Pixman_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "Libdl"] +git-tree-sha1 = "35621f10a7531bc8fa58f74610b1bfb70a3cfc6b" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.43.4+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.11.0" +weakdeps = ["REPL"] + + [deps.Pkg.extensions] + REPLExt = "REPL" + +[[deps.PlotThemes]] +deps = ["PlotUtils", "Statistics"] +git-tree-sha1 = "41031ef3a1be6f5bbbf3e8073f210556daeae5ca" +uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" +version = "3.3.0" + +[[deps.PlotUtils]] +deps = ["ColorSchemes", "Colors", "Dates", "PrecompileTools", "Printf", "Random", "Reexport", "StableRNGs", "Statistics"] +git-tree-sha1 = "650a022b2ce86c7dcfbdecf00f78afeeb20e5655" +uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" +version = "1.4.2" + +[[deps.Plots]] +deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "PrecompileTools", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "TOML", "UUIDs", "UnicodeFun", "UnitfulLatexify", "Unzip"] +git-tree-sha1 = "45470145863035bb124ca51b320ed35d071cc6c2" +uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +version = "1.40.8" + + [deps.Plots.extensions] + FileIOExt = "FileIO" + GeometryBasicsExt = "GeometryBasics" + IJuliaExt = "IJulia" + ImageInTerminalExt = "ImageInTerminal" + UnitfulExt = "Unitful" + + [deps.Plots.weakdeps] + FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" + GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" + IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" + ImageInTerminal = "d8c32880-2388-543b-8c61-d9f865259254" + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.1" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.3" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.Profile]] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" +version = "1.11.0" + +[[deps.Qt6Base_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Vulkan_Loader_jll", "Xorg_libSM_jll", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_cursor_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "libinput_jll", "xkbcommon_jll"] +git-tree-sha1 = "492601870742dcd38f233b23c3ec629628c1d724" +uuid = "c0090381-4147-56d7-9ebc-da0b1113ec56" +version = "6.7.1+1" + +[[deps.Qt6Declarative_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Qt6Base_jll", "Qt6ShaderTools_jll"] +git-tree-sha1 = "e5dd466bf2569fe08c91a2cc29c1003f4797ac3b" +uuid = "629bc702-f1f5-5709-abd5-49b8460ea067" +version = "6.7.1+2" + +[[deps.Qt6ShaderTools_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Qt6Base_jll"] +git-tree-sha1 = "1a180aeced866700d4bebc3120ea1451201f16bc" +uuid = "ce943373-25bb-56aa-8eca-768745ed7b5a" +version = "6.7.1+1" + +[[deps.Qt6Wayland_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Qt6Base_jll", "Qt6Declarative_jll"] +git-tree-sha1 = "729927532d48cf79f49070341e1d918a65aba6b0" +uuid = "e99dba38-086e-5de3-a5b1-6e4c66e897c3" +version = "6.7.1+1" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.Ratios]] +deps = ["Requires"] +git-tree-sha1 = "1342a47bf3260ee108163042310d26f2be5ec90b" +uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" +version = "0.4.5" +weakdeps = ["FixedPointNumbers"] + + [deps.Ratios.extensions] + RatiosFixedPointNumbersExt = "FixedPointNumbers" + +[[deps.RealDot]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9" +uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9" +version = "0.1.0" + +[[deps.RecipesBase]] +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.3.4" + +[[deps.RecipesPipeline]] +deps = ["Dates", "NaNMath", "PlotUtils", "PrecompileTools", "RecipesBase"] +git-tree-sha1 = "45cf9fd0ca5839d06ef333c8201714e888486342" +uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" +version = "0.6.12" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.RegistryInstances]] +deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] +git-tree-sha1 = "ffd19052caf598b8653b99404058fce14828be51" +uuid = "2792f1a3-b283-48e8-9a74-f99dce5104f3" +version = "0.1.0" + +[[deps.RelocatableFolders]] +deps = ["SHA", "Scratch"] +git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864" +uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" +version = "1.0.1" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Sass]] +deps = ["libsass_jll"] +git-tree-sha1 = "aa841c3738cec78b5dbccd56dda332710f35f6a5" +uuid = "322a6be2-4ae8-5d68-aaf1-3e960788d1d9" +version = "0.2.0" + +[[deps.Scientific_Programming_in_Julia]] +deps = ["BenchmarkTools", "LinearAlgebra", "StatsBase"] +path = ".." +uuid = "ff288558-a322-4c39-84bc-85bf6acf7e03" +version = "0.1.0" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.2.1" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" +version = "1.11.0" + +[[deps.Showoff]] +deps = ["Dates", "Grisu"] +git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de" +uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" +version = "1.0.3" + +[[deps.SimpleBufferStream]] +git-tree-sha1 = "f305871d2f381d21527c770d4788c06c097c9bc1" +uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" +version = "1.2.0" + +[[deps.SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.4" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.SoftPosit]] +git-tree-sha1 = "a5abcfe620c577d1e89fc4b0efdb0aa9e117989c" +uuid = "0775deef-a35f-56d7-82da-cfc52f91364d" +version = "0.5.3" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.1" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +version = "1.11.0" + +[[deps.SparseInverseSubset]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "52962839426b75b3021296f7df242e40ecfc0852" +uuid = "dc90abb0-5640-4711-901d-7e5b23a2fada" +version = "0.1.2" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "2f5d4697f21388cbe1ff299430dd169ef97d7e14" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.4.0" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.StableRNGs]] +deps = ["Random"] +git-tree-sha1 = "83e6cce8324d49dfaf9ef059227f91ed4441a8e5" +uuid = "860ef19b-820b-49d6-a774-d7a799459cd3" +version = "1.0.2" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "777657803913ffc7e8cc20f0fd04b634f871af8f" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.9.8" +weakdeps = ["ChainRulesCore", "Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.3" + +[[deps.Statistics]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.11.1" +weakdeps = ["SparseArrays"] + + [deps.Statistics.extensions] + SparseArraysExt = ["SparseArrays"] + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.7.0" + +[[deps.StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "5cf7606d6cef84b543b483848d4ae08ad9832b21" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.34.3" + +[[deps.StructArrays]] +deps = ["ConstructionBase", "DataAPI", "Tables"] +git-tree-sha1 = "f4dc295e983502292c4c3f951dbb4e985e35b3be" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.6.18" +weakdeps = ["Adapt", "GPUArraysCore", "SparseArrays", "StaticArrays"] + + [deps.StructArrays.extensions] + StructArraysAdaptExt = "Adapt" + StructArraysGPUArraysCoreExt = "GPUArraysCore" + StructArraysSparseArraysExt = "SparseArrays" + StructArraysStaticArraysExt = "StaticArrays" + +[[deps.StyledStrings]] +uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +version = "1.11.0" + +[[deps.SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "7.7.0+0" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.12.0" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.TensorCore]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1feb45f88d133a655e001435632f019a9a1bcdb6" +uuid = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50" +version = "0.1.1" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" + +[[deps.TranscodingStreams]] +git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.11.3" + +[[deps.URIs]] +git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.5.1" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.Umlaut]] +deps = ["ExprTools", "LinearAlgebra", "Statistics", "Test"] +git-tree-sha1 = "1eb4e1c4cd1a5f141dd13a05db6f400a4904689d" +uuid = "92992a2b-8ce5-4a9c-bb9d-58be9a7dc841" +version = "0.7.0" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.UnicodeFun]] +deps = ["REPL"] +git-tree-sha1 = "53915e50200959667e78a92a418594b428dffddf" +uuid = "1cfade01-22cf-5700-b092-accc4b62d6e1" +version = "0.4.1" + +[[deps.Unitful]] +deps = ["Dates", "LinearAlgebra", "Random"] +git-tree-sha1 = "d95fe458f26209c66a187b1114df96fd70839efd" +uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" +version = "1.21.0" + + [deps.Unitful.extensions] + ConstructionBaseUnitfulExt = "ConstructionBase" + InverseFunctionsUnitfulExt = "InverseFunctions" + + [deps.Unitful.weakdeps] + ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.UnitfulLatexify]] +deps = ["LaTeXStrings", "Latexify", "Unitful"] +git-tree-sha1 = "975c354fcd5f7e1ddcc1f1a23e6e091d99e99bc8" +uuid = "45397f5d-5981-4c77-b2b3-fc36d6e9b728" +version = "1.6.4" + +[[deps.Unzip]] +git-tree-sha1 = "ca0969166a028236229f63514992fc073799bb78" +uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d" +version = "0.2.0" + +[[deps.Vulkan_Loader_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Wayland_jll", "Xorg_libX11_jll", "Xorg_libXrandr_jll", "xkbcommon_jll"] +git-tree-sha1 = "2f0486047a07670caad3a81a075d2e518acc5c59" +uuid = "a44049a8-05dd-5a78-86c9-5fde0876e88c" +version = "1.3.243+0" + +[[deps.Wayland_jll]] +deps = ["Artifacts", "EpollShim_jll", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "7558e29847e99bc3f04d6569e82d0f5c54460703" +uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89" +version = "1.21.0+1" + +[[deps.Wayland_protocols_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "93f43ab61b16ddfb2fd3bb13b3ce241cafb0e6c9" +uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91" +version = "1.31.0+0" + +[[deps.WoodburyMatrices]] +deps = ["LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "c1a7aa6219628fcd757dede0ca95e245c5cd9511" +uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" +version = "1.0.0" + +[[deps.XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] +git-tree-sha1 = "6a451c6f33a176150f315726eba8b92fbfdb9ae7" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.13.4+0" + +[[deps.XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "XML2_jll", "Zlib_jll"] +git-tree-sha1 = "a54ee957f4c86b526460a720dbc882fa5edcbefc" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.41+0" + +[[deps.XZ_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "15e637a697345f6743674f1322beefbc5dcd5cfc" +uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" +version = "5.6.3+0" + +[[deps.Xorg_libICE_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "326b4fea307b0b39892b3e85fa451692eda8d46c" +uuid = "f67eecfb-183a-506d-b269-f58e52b52d7c" +version = "1.1.1+0" + +[[deps.Xorg_libSM_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libICE_jll"] +git-tree-sha1 = "3796722887072218eabafb494a13c963209754ce" +uuid = "c834827a-8449-5923-a945-d239c165b7dd" +version = "1.2.4+0" + +[[deps.Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "afead5aba5aa507ad5a3bf01f58f82c8d1403495" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.8.6+0" + +[[deps.Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6035850dcc70518ca32f012e46015b9beeda49d8" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.11+0" + +[[deps.Xorg_libXcursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd" +uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724" +version = "1.2.0+4" + +[[deps.Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "34d526d318358a859d7de23da945578e8e8727b7" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.4+0" + +[[deps.Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "d2d1a5c49fae4ba39983f63de6afcbea47194e85" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.6+0" + +[[deps.Xorg_libXfixes_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4" +uuid = "d091e8ba-531a-589c-9de9-94069b037ed8" +version = "5.0.3+4" + +[[deps.Xorg_libXi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"] +git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246" +uuid = "a51aa0fd-4e3c-5386-b890-e753decda492" +version = "1.7.10+4" + +[[deps.Xorg_libXinerama_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"] +git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123" +uuid = "d1454406-59df-5ea1-beac-c340f2130bc3" +version = "1.1.4+4" + +[[deps.Xorg_libXrandr_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631" +uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484" +version = "1.5.2+4" + +[[deps.Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "47e45cd78224c53109495b3e324df0c37bb61fbe" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.11+0" + +[[deps.Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8fdda4c692503d44d04a0603d9ac0982054635f9" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.1+0" + +[[deps.Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "bcd466676fef0878338c61e655629fa7bbc69d8e" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.17.0+0" + +[[deps.Xorg_libxkbfile_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "730eeca102434283c50ccf7d1ecdadf521a765a4" +uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a" +version = "1.1.2+0" + +[[deps.Xorg_xcb_util_cursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_jll", "Xorg_xcb_util_renderutil_jll"] +git-tree-sha1 = "04341cb870f29dcd5e39055f895c39d016e18ccd" +uuid = "e920d4aa-a673-5f3a-b3d7-f755a4d47c43" +version = "0.1.4+0" + +[[deps.Xorg_xcb_util_image_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97" +uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"] +git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1" +uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_keysyms_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00" +uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_renderutil_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e" +uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e" +version = "0.3.9+1" + +[[deps.Xorg_xcb_util_wm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67" +uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361" +version = "0.4.1+1" + +[[deps.Xorg_xkbcomp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxkbfile_jll"] +git-tree-sha1 = "330f955bc41bb8f5270a369c473fc4a5a4e4d3cb" +uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4" +version = "1.4.6+0" + +[[deps.Xorg_xkeyboard_config_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_xkbcomp_jll"] +git-tree-sha1 = "691634e5453ad362044e2ad653e79f3ee3bb98c3" +uuid = "33bec58e-1273-512f-9401-5d533626f822" +version = "2.39.0+0" + +[[deps.Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "e92a1a012a10506618f10b7047e478403a046c77" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.5.0+0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+1" + +[[deps.Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "555d1076590a6cc2fdee2ef1469451f872d8b41b" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.5.6+1" + +[[deps.Zygote]] +deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "GPUArrays", "GPUArraysCore", "IRTools", "InteractiveUtils", "LinearAlgebra", "LogExpFunctions", "MacroTools", "NaNMath", "PrecompileTools", "Random", "Requires", "SparseArrays", "SpecialFunctions", "Statistics", "ZygoteRules"] +git-tree-sha1 = "f816633be6dc5c0ed9ffedda157ecfda0b3b6a69" +uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" +version = "0.6.72" + + [deps.Zygote.extensions] + ZygoteColorsExt = "Colors" + ZygoteDistancesExt = "Distances" + ZygoteTrackerExt = "Tracker" + + [deps.Zygote.weakdeps] + Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" + Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.ZygoteRules]] +deps = ["ChainRulesCore", "MacroTools"] +git-tree-sha1 = "27798139afc0a2afa7b1824c206d5e87ea587a00" +uuid = "700de1a5-db45-46bc-99cf-38207098b444" +version = "0.2.5" + +[[deps.eudev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gperf_jll"] +git-tree-sha1 = "431b678a28ebb559d224c0b6b6d01afce87c51ba" +uuid = "35ca27e7-8b34-5b7f-bca9-bdc33f59eb06" +version = "3.2.9+0" + +[[deps.fzf_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "936081b536ae4aa65415d869287d43ef3cb576b2" +uuid = "214eeab7-80f7-51ab-84ad-2988db7cef09" +version = "0.53.0+0" + +[[deps.gperf_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "3516a5630f741c9eecb3720b1ec9d8edc3ecc033" +uuid = "1a1c6b14-54f6-533d-8383-74cd7377aa70" +version = "3.1.1+0" + +[[deps.libaom_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "1827acba325fdcdf1d2647fc8d5301dd9ba43a9d" +uuid = "a4ae2306-e953-59d6-aa16-d00cac43593b" +version = "3.9.0+0" + +[[deps.libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "e17c115d55c5fbb7e52ebedb427a0dca79d4484e" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.15.2+0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.11.0+0" + +[[deps.libdecor_jll]] +deps = ["Artifacts", "Dbus_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pango_jll", "Wayland_jll", "xkbcommon_jll"] +git-tree-sha1 = "9bf7903af251d2050b467f76bdbe57ce541f7f4f" +uuid = "1183f4f0-6f2a-5f1a-908b-139f9cdfea6f" +version = "0.2.2+0" + +[[deps.libevdev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "141fe65dc3efabb0b1d5ba74e91f6ad26f84cc22" +uuid = "2db6ffa8-e38f-5e21-84af-90c45d0032cc" +version = "1.11.0+0" + +[[deps.libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8a22cf860a7d27e4f3498a0fe0811a7957badb38" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "2.0.3+0" + +[[deps.libinput_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "eudev_jll", "libevdev_jll", "mtdev_jll"] +git-tree-sha1 = "ad50e5b90f222cfe78aa3d5183a20a12de1322ce" +uuid = "36db933b-70db-51c0-b978-0f229ee0e533" +version = "1.18.0+0" + +[[deps.libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "b70c870239dc3d7bc094eb2d6be9b73d27bef280" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.44+0" + +[[deps.libsass_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6044ffe7e7bf0602e2039dc747c3332a097ac74b" +uuid = "47bcb7c8-5119-555a-9eeb-0afcc36cd728" +version = "3.6.6+0" + +[[deps.libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "490376214c4721cdaca654041f635213c6165cb3" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.7+2" + +[[deps.mtdev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "814e154bdb7be91d78b6802843f76b6ece642f11" +uuid = "009596ad-96f7-51b1-9f1b-5ce2d5e8a71e" +version = "1.1.6+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.59.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+2" + +[[deps.x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2021.5.5+0" + +[[deps.x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.5.0+0" + +[[deps.xkbcommon_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] +git-tree-sha1 = "9c304562909ab2bab0262639bd4f444d7bc2be37" +uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" +version = "1.4.1+1" diff --git a/docs_vitepress/Project.toml b/docs_vitepress/Project.toml new file mode 100644 index 00000000..b2c2484c --- /dev/null +++ b/docs_vitepress/Project.toml @@ -0,0 +1,21 @@ +[deps] +AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8" +DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" +GraphRecipes = "bd48cda9-67a9-57be-86fa-5b3c104eda73" +IRTools = "7869d1d1-7146-5819-86e3-90919afe41df" +JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" +LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" +MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +Scientific_Programming_in_Julia = "ff288558-a322-4c39-84bc-85bf6acf7e03" +SoftPosit = "0775deef-a35f-56d7-82da-cfc52f91364d" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Umlaut = "92992a2b-8ce5-4a9c-bb9d-58be9a7dc841" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[compat] +julia = "1" diff --git a/docs_vitepress/make.jl b/docs_vitepress/make.jl new file mode 100644 index 00000000..7b60f87b --- /dev/null +++ b/docs_vitepress/make.jl @@ -0,0 +1,135 @@ +using Documenter, DocumenterVitepress +using Documenter.Remotes + +using Scientific_Programming_in_Julia + +# This is needed for live preview +if get(ENV, "VITREPRESS_LIVE_PREVIEW", "false") == "true" + VITREPRESS_KWARGS = (; + md_output_path=".", + build_vitepress=false, + ) + MAKEDOCS_KWARGS = (; clean=false,) +else + VITREPRESS_KWARGS = (;) + MAKEDOCS_KWARGS = (;) +end + +@show VITREPRESS_KWARGS +@show MAKEDOCS_KWARGS + +# utilities +function add_prefix(prefix::S, pair::Pair{S,T}) where {S<:AbstractString,T} + key, val = pair + if isa(val, AbstractString) + return key => joinpath(prefix, val) + else + return key => add_prefix(prefix, val) + end +end + +function add_prefix(prefix::AbstractString, pairs::AbstractVector{<:Pair}) + return add_prefix.(prefix, pairs) +end + +# pages +pages = [ + "Home" => "index.md", + "Tutorials" => add_prefix("./tutorials", [ + "Installation" => "installation.md", + ]), + "Projects" => add_prefix("./projects", [ + "Requirements" => "requirements.md", + "Potential projects" => "projects.md", + ]), + "Lectures" => add_prefix("./lectures", [ + "Outline" => "outline.md", + "1: Introduction" => add_prefix("lecture_01", [ + "Motivation" => "motivation.md", + "Basics" => "basics.md", + "Examples" => "demo.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "2: The power of type system & multiple dispatch" => add_prefix("lecture_02", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "3: Design patterns" => add_prefix("lecture_03", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "4: Package development, unit tests & CI" => add_prefix("lecture_04", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "5: Performance benchmarking" => add_prefix("lecture_05", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "6: Lanuage introspection" => add_prefix("lecture_06", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "7: Macros" => add_prefix("lecture_07", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "8: Automatic differentiation" => add_prefix("lecture_08", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "9: Intermediate representation" => add_prefix("lecture_09", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + ]), + "10: Parallel programming" => add_prefix("lecture_10", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + "11: GPU programming" => add_prefix("lecture_11", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + ]), + "12: Ordinary Differential Equations" => add_prefix("lecture_12", [ + "Lecture" => "lecture.md", + "Lab" => "lab.md", + "Homework" => "hw.md", + ]), + ]), +] + +# documentation +organisation = "JuliaTeachingCTU" +repository = "Scientific-Programming-in-Julia" +repo = Remotes.GitHub(organisation, repository) + +makedocs(; + modules=[Scientific_Programming_in_Julia], + authors=organisation, + repo=repo, + sitename="Scientific Programming in Julia", + format=DocumenterVitepress.MarkdownVitepress(; + repo=Remotes.repourl(repo), + VITREPRESS_KWARGS..., + ), + pages=pages, + warnonly=true, + MAKEDOCS_KWARGS..., +) + +deploydocs(; + repo=repo, + target="build", + devbranch="main", + branch="gh-pages", + push_preview=true, +) diff --git a/docs_vitepress/package.json b/docs_vitepress/package.json new file mode 100644 index 00000000..ebd6f3d4 --- /dev/null +++ b/docs_vitepress/package.json @@ -0,0 +1,16 @@ +{ + "scripts": { + "docs:dev": "vitepress dev build/.documenter", + "docs:build": "vitepress build build/.documenter", + "docs:preview": "vitepress preview build/.documenter" + }, + "dependencies": { + "@shikijs/transformers": "^1.1.7", + "dependencies": "^0.0.1", + "markdown-it": "^14.1.0", + "markdown-it-footnote": "^4.0.0", + "markdown-it-mathjax3": "^4.3.2", + "vitepress": "^1.5.0", + "vitepress-plugin-tabs": "^0.5.0" + } +} diff --git a/docs_vitepress/run_live_preview.sh b/docs_vitepress/run_live_preview.sh new file mode 100755 index 00000000..9d7dac2f --- /dev/null +++ b/docs_vitepress/run_live_preview.sh @@ -0,0 +1,10 @@ +export VITREPRESS_LIVE_PREVIEW=true + +# install all +npm install vitepress dependencies + +# run julia server and build documentation +julia -e 'using LiveServer; servedocs(foldername=pwd())' --project="." & +julia -e 'using DocumenterVitepress: dev_docs; dev_docs("build", md_output_path="")' --project="." & + +wait \ No newline at end of file diff --git a/docs_vitepress/src/.vitepress/config.mts b/docs_vitepress/src/.vitepress/config.mts new file mode 100644 index 00000000..000937c3 --- /dev/null +++ b/docs_vitepress/src/.vitepress/config.mts @@ -0,0 +1,50 @@ +import { defineConfig } from 'vitepress' +import { tabsMarkdownPlugin } from 'vitepress-plugin-tabs' +import mathjax3 from "markdown-it-mathjax3"; +import footnote from "markdown-it-footnote"; + + +// https://vitepress.dev/reference/site-config +export default defineConfig({ + base: 'REPLACE_ME_DOCUMENTER_VITEPRESS',// TODO: replace this in makedocs! + title: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + description: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + lastUpdated: true, + cleanUrls: true, + outDir: 'REPLACE_ME_DOCUMENTER_VITEPRESS', // This is required for MarkdownVitepress to work correctly... + head: [['link', { rel: 'icon', href: 'REPLACE_ME_DOCUMENTER_VITEPRESS_FAVICON' }]], + ignoreDeadLinks: true, + + markdown: { + math: true, + config(md) { + md.use(tabsMarkdownPlugin), + md.use(mathjax3), + md.use(footnote) + }, + theme: { + light: "github-light", + dark: "github-dark" + } + }, + themeConfig: { + outline: 'deep', + logo: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + search: { + provider: 'local', + options: { + detailedView: true + } + }, + nav: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + sidebar: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + editLink: 'REPLACE_ME_DOCUMENTER_VITEPRESS', + socialLinks: [ + { icon: 'github', link: 'REPLACE_ME_DOCUMENTER_VITEPRESS' } + ], + footer: { + message: 'Made with DocumenterVitepress.jl
', + copyright: `© Copyright ${new Date().getUTCFullYear()}.` + } + }, +}) diff --git a/docs_vitepress/src/.vitepress/theme/index.ts b/docs_vitepress/src/.vitepress/theme/index.ts new file mode 100644 index 00000000..463b5d85 --- /dev/null +++ b/docs_vitepress/src/.vitepress/theme/index.ts @@ -0,0 +1,19 @@ +// .vitepress/theme/index.ts +import { h } from 'vue' +import type { Theme } from 'vitepress' +import DefaultTheme from 'vitepress/theme' + +import { enhanceAppWithTabs } from 'vitepress-plugin-tabs/client' +import './style.css' + +export default { + extends: DefaultTheme, + Layout() { + return h(DefaultTheme.Layout, null, { + // https://vitepress.dev/guide/extending-default-theme#layout-slots + }) + }, + enhanceApp({ app, router, siteData }) { + enhanceAppWithTabs(app) + } +} satisfies Theme \ No newline at end of file diff --git a/docs_vitepress/src/.vitepress/theme/style.css b/docs_vitepress/src/.vitepress/theme/style.css new file mode 100644 index 00000000..994f490e --- /dev/null +++ b/docs_vitepress/src/.vitepress/theme/style.css @@ -0,0 +1,273 @@ +/* Customize default theme styling by overriding CSS variables: +https://github.com/vuejs/vitepress/blob/main/src/client/theme-default/styles/vars.css + */ + +/* Layouts */ + +/* + :root { + --vp-layout-max-width: 1440px; +} */ + +.VPHero .clip { + white-space: pre; + max-width: 500px; +} + +/* Fonts */ + +@font-face { + font-family: JuliaMono-Regular; + src: url("https://cdn.jsdelivr.net/gh/cormullion/juliamono/webfonts/JuliaMono-Regular.woff2"); +} + +:root { + /* Typography */ + --vp-font-family-base: "Barlow", "Inter var experimental", "Inter var", + -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen, Ubuntu, + Cantarell, "Fira Sans", "Droid Sans", "Helvetica Neue", sans-serif; + + /* Code Snippet font */ + --vp-font-family-mono: JuliaMono-Regular, monospace; + +} + +/* +Disable contextual alternates (kind of like ligatures but different) in monospace, +which turns `/>` to an up arrow and `|>` (the Julia pipe symbol) to an up arrow as well. +This is pretty bad for Julia folks reading even though copy+paste retains the same text. +*/ +/* Target elements with class 'mono' */ +.mono-no-substitutions { + font-family: "JuliaMono-Light", monospace; + font-feature-settings: "calt" off; +} + +/* Alternatively, you can use the following if you prefer: */ +.mono-no-substitutions-alt { + font-family: "JuliaMono-Light", monospace; + font-variant-ligatures: none; +} + +/* If you want to apply this globally to all monospace text: */ +pre code { + font-family: "JuliaMono-Light", monospace; + font-feature-settings: "calt" off; +} + +/* Colors */ + +:root { + --julia-blue: #4063D8; + --julia-purple: #9558B2; + --julia-red: #CB3C33; + --julia-green: #389826; + + /* --vp-c-brand: #389826; + --vp-c-brand-light: #3dd027; + --vp-c-brand-lighter: #9499ff; + --vp-c-brand-lightest: #bcc0ff; + --vp-c-brand-dark: #535bf2; + --vp-c-brand-darker: #454ce1; + --vp-c-brand-dimm: #212425; */ + + --vp-c-brand: #074f73; + --vp-c-brand-light: #086b9d; + --vp-c-brand-lighter: #2795cc; + --vp-c-brand-lightest: #7dc5ea; + --vp-c-brand-dark: #535bf2; + --vp-c-brand-darker: #454ce1; + --vp-c-brand-dimm: #212425; +} + +/* Component: Button */ + +:root { + --vp-button-brand-border: var(--vp-c-brand-light); + --vp-button-brand-text: var(--vp-c-white); + --vp-button-brand-bg: var(--vp-c-brand); + --vp-button-brand-hover-border: var(--vp-c-brand-light); + --vp-button-brand-hover-text: var(--vp-c-white); + --vp-button-brand-hover-bg: var(--vp-c-brand-light); + --vp-button-brand-active-border: var(--vp-c-brand-light); + --vp-button-brand-active-text: var(--vp-c-white); + --vp-button-brand-active-bg: var(--vp-button-brand-bg); +} + +/* Component: Home */ + +:root { + --vp-home-hero-name-color: transparent; + --vp-home-hero-name-background: -webkit-linear-gradient(120deg, + #074f73 30%, + #389826); + + --vp-home-hero-image-background-image: linear-gradient(-45deg, + #ffffff, + #074f73 20%, + #ffffff); + --vp-home-hero-image-filter: blur(40px); +} + +@media (min-width: 640px) { + :root { + --vp-home-hero-image-filter: blur(56px); + } +} + +@media (min-width: 960px) { + :root { + --vp-home-hero-image-filter: blur(72px); + } +} + +/* Component: Custom Block */ + +:root.dark { + --vp-custom-block-tip-border: var(--vp-c-brand); + --vp-custom-block-tip-text: var(--vp-c-brand-lightest); + --vp-custom-block-tip-bg: var(--vp-c-brand-dimm); + + /* // Tweak the color palette for blacks and dark grays */ + --vp-c-black: hsl(220 20% 9%); + --vp-c-black-pure: hsl(220, 24%, 4%); + --vp-c-black-soft: hsl(220 16% 13%); + --vp-c-black-mute: hsl(220 14% 17%); + --vp-c-gray: hsl(220 8% 56%); + --vp-c-gray-dark-1: hsl(220 10% 39%); + --vp-c-gray-dark-2: hsl(220 12% 28%); + --vp-c-gray-dark-3: hsl(220 12% 23%); + --vp-c-gray-dark-4: hsl(220 14% 17%); + --vp-c-gray-dark-5: hsl(220 16% 13%); + + /* // Backgrounds */ + /* --vp-c-bg: hsl(240, 2%, 11%); */ + --vp-custom-block-info-bg: hsl(220 14% 17%); + /* --vp-c-gutter: hsl(220 20% 9%); + + --vp-c-bg-alt: hsl(220 20% 9%); + --vp-c-bg-soft: hsl(220 14% 17%); + --vp-c-bg-mute: hsl(220 12% 23%); + */ +} + +/* Component: Algolia */ + +.DocSearch { + --docsearch-primary-color: var(--vp-c-brand) !important; +} + +/* Component: MathJax */ + +mjx-container>svg { + display: block; + margin: auto; +} + +mjx-container { + padding: 0.5rem 0; +} + +mjx-container { + display: inline; + margin: auto 2px -2px; +} + +mjx-container>svg { + margin: auto; + display: inline-block; +} + +/** + * Colors links + * -------------------------------------------------------------------------- */ + +:root { + --vp-c-brand-1: #086b9d; + --vp-c-brand-2: #086b9d; + --vp-c-brand-3: #086b9d; + --vp-c-sponsor: #086b9d; + --vitest-c-sponsor-hover: #c13071; +} + +.dark { + --vp-c-brand-1: #2795cc; + --vp-c-brand-2: #2795cc; + --vp-c-brand-3: #2795cc; + --vp-c-sponsor: #2795cc; + --vitest-c-sponsor-hover: #CB3C33; +} + +/** + * Change images from light to dark theme + * -------------------------------------------------------------------------- */ + +:root:not(.dark) .dark-only { + display: none; +} + +:root:is(.dark) .light-only { + display: none; +} + +/* https://bddxg.top/article/note/vitepress优化/一些细节上的优化.html#文档页面调整-加宽 */ + +.VPDoc.has-aside .content-container { + max-width: 100% !important; +} + +.aside { + max-width: 200px !important; + padding-left: 0 !important; +} + +.VPDoc { + padding-top: 15px !important; + padding-left: 5px !important; + +} + +/* This one does the right menu */ + +.VPDocOutlineItem li { + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + max-width: 200px; +} + +.VPNavBar .title { + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; +} + +@media (max-width: 960px) { + .VPDoc { + padding-left: 25px !important; + } +} + +/* This one does the left menu */ + +/* .VPSidebarItem .VPLink p { + text-overflow: ellipsis; + overflow: hidden; + white-space: nowrap; + max-width: 200px; +} */ + + +/* Component: Docstring Custom Block */ + +.jldocstring.custom-block { + border: 1px solid var(--vp-c-gray-2); + color: var(--vp-c-text-1) +} + +.jldocstring.custom-block summary { + font-weight: 700; + cursor: pointer; + user-select: none; + margin: 0 0 8px; +} \ No newline at end of file diff --git a/docs_vitepress/src/assets/dual.png b/docs_vitepress/src/assets/dual.png new file mode 100644 index 00000000..708f43a1 Binary files /dev/null and b/docs_vitepress/src/assets/dual.png differ diff --git a/docs_vitepress/src/assets/favicon.ico b/docs_vitepress/src/assets/favicon.ico new file mode 100644 index 00000000..535e728b Binary files /dev/null and b/docs_vitepress/src/assets/favicon.ico differ diff --git a/docs_vitepress/src/assets/julia-gpu-logo.png b/docs_vitepress/src/assets/julia-gpu-logo.png new file mode 100644 index 00000000..c5610d09 Binary files /dev/null and b/docs_vitepress/src/assets/julia-gpu-logo.png differ diff --git a/docs_vitepress/src/assets/julia-set.png b/docs_vitepress/src/assets/julia-set.png new file mode 100644 index 00000000..b5b47fc5 Binary files /dev/null and b/docs_vitepress/src/assets/julia-set.png differ diff --git a/docs_vitepress/src/assets/logo-dark.svg b/docs_vitepress/src/assets/logo-dark.svg new file mode 100644 index 00000000..eb048ce7 --- /dev/null +++ b/docs_vitepress/src/assets/logo-dark.svg @@ -0,0 +1,142 @@ + + + +image/svg+xml + + + + \ No newline at end of file diff --git a/docs_vitepress/src/assets/logo.svg b/docs_vitepress/src/assets/logo.svg new file mode 100644 index 00000000..8af6b680 --- /dev/null +++ b/docs_vitepress/src/assets/logo.svg @@ -0,0 +1,143 @@ + + + +image/svg+xml + + + + \ No newline at end of file diff --git a/docs_vitepress/src/index.md b/docs_vitepress/src/index.md new file mode 100644 index 00000000..7a70402b --- /dev/null +++ b/docs_vitepress/src/index.md @@ -0,0 +1,137 @@ +```@raw html +--- +# https://vitepress.dev/reference/default-theme-home-page +layout: home + +hero: + name: "" + text: "Scientific Programming in Julia" + image: + light: assets/logo.svg + dark: assets/logo-dark.svg + outline: true + actions: + - theme: brand + text: Lectures + link: /lectures/outline + - theme: alt + text: Tutorials + link: /tutorials/installation + - theme: alt + text: Projects + link: /projects/requirements +--- +``` + +Scientific Programming requires the highest performance but we also want to +write very high level code to enable rapid prototyping and avoid error prone, +low level implementations. + +The [Julia](https://docs.julialang.org/en/v1/) programming language is designed +with exactly those requirements of scientific computing in mind. In this +course we will show you how to make use of the tools and advantages that +*jit-compiled* Julia provides over *dynamic*, high-level languages like Python +or lower level languages like C++. + +![](assets/dual.png) +```html +# TODO: caption for images not working +
+ +
+ Wield the power of abstraction. + Example: The essence of forward mode automatic differentiation. +
+
+``` + +Before joining the course, consider reading the following two blog posts to figure out if Julia is +a language in which you want to invest your time. +- What is [*great*](https://viralinstruction.com/posts/goodjulia/) about Julia. +- What is [*bad*](https://viralinstruction.com/posts/badjulia/) about Julia. + + +## What will you learn? + +First and foremost you will learn how to _**think julia**_ - meaning how write +fast, extensible, reusable, and easy-to-read code using things like *optional +typing*, *multiple dispatch*, and functional programming concepts. The later +part of the course will teach you how to use more advanced concepts like +*language introspection*, *metaprogramming*, and *symbolic computing*. +Amonst others you will _**implement your own automatic differentiation**_ (the +backbone of modern machine learning) package based on these advanced techniques +that can transform intermediate representations of Julia code. + + +## Organization + +This course webpage contains all information about the course that you need, +including lecture notes, lab instructions, and homeworks. The official format +of the course is 2+2 (2h lectures/2h labs per week) for 4 credits. + +The official course code is: **B0M36SPJ** and the timetable for the winter semester +2022 can be found [here](https://fel.cvut.cz/cz/education/rozvrhy-ng.B221/public/html/predmety/69/85/p6985906.html). + +The course will be graded based on points from your homework (**max. 20 points**) +and points from a [final project](@ref final_project) (**max. 30 points**). + +Below is a table that shows which lectures have homeworks (and their points). + +| Homework | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | +| :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | :-- | +| Points | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | - | 2 | - | 2 | - | + +Hint: The **first few homeworks are easier**. Use them to fill up your points. + + +## [Final project](@id final_project) + +The final project will be individually agreed on for each student. Ideally you +can use this project to solve a problem you have e.g. in your thesis, but don't +worry - if you cannot come up with an own project idea, we will suggest one to +you. More info and project suggestion can be found [here](@ref projects). Check +out the [list of projects](@ref former_projects) from past to get a feeling for +what we expect. + + +## Grading + +Your points from the homeworks and the final project are summed and graded by +the standard grading scale below. + +| Grade | A | B | C | D | E | F | +| :-- | :-- | :-- | :-- | :-- | :-- | :-- | +| Points | 45-50 | 40-44 | 35-39 | 30-34 | 25-29 | 0-25 | + + +## [Teachers](@id emails) + +| -- | E-mail | Room | Role | +| :-- | :-- | :-- | :-- | +| Tomáš Pevný | [pevnak@protonmail.ch](mailto:pevnak@protonmail.ch) | KN:E-406 | Lecturer | +| Vašek Šmídl | [smidlva1@fjfi.cvut.cz](mailto:smidlva1@fjfi.cvut.cz) | KN:E-333 | Lecturer | +| Matěj Zorek | [zorekmat@fel.cvut.cz](mailto:zorekmat@fel.cvut.cz) | KN:E-333 | Lab Instructor | +| Niklas Heim | [heimnikl@fel.cvut.cz](mailto:heimnikl@fel.cvut.cz) | KN:E-333 | Lab Instructor | + + +## Prerequisites + +There are no hard requirements to take the course. We go through the basics of Julia, but we do it rather quickly, +so some familiarity with the language is an advantage. +If you are looking for an in depth course on the basics of the language, we recommend to check out [Julia for Optimization and Learning](https://github.com/JuliaTeachingCTU/Julia-for-Optimization-and-Learning) before enrolling in this course. The [Functional Programming](https://cw.fel.cvut.cz/b202/courses/fup/start) course also contains +some helpful concepts for this course. And knowledge about computer hardware, namely basics of how CPU works, how it interacts with memory through caches, and basics of multi-threading certainly helps. + +## References + +- [Official documentation](https://docs.julialang.org/en/v1/) +- [Modern Julia Workflows](https://modernjuliaworkflows.github.io) +- [Workflow tips, and what is new in v1.9](https://www.youtube.com/watch?v=qM9NtiYlXck) +- [Zero2Hero Julia Workhop](https://github.com/Datseris/Zero2Hero-JuliaWorkshop) +- [Think Julia: How to Think Like a Computer Scientist](https://benlauwens.github.io/ThinkJulia.jl/latest/book.html#chap01) +- [From Zero to Julia!](https://techytok.com/from-zero-to-julia/) +- [WikiBooks](https://en.wikibooks.org/wiki/Introducing_Julia) +- [Justin Krumbiel's](https://jkrumbiegel.com/pages/2022-08-26-pkg-introduction/) excellent introduction to the package manager. +- [juliadatascience.io](https://juliadatascience.io) contains an excellent introduction to plotting with Makie. +- The art of [multiple dispatch](https://scientificcoder.com/the-art-of-multiple-dispatch) +- MIT Course: [Julia Computation](https://github.com/mitmath/JuliaComputation) +- Tim Holy's [Advanced Scientific Computing](https://github.com/timholy/AdvancedScientificComputing) \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_01/10_ode.svg b/docs_vitepress/src/lectures/lecture_01/10_ode.svg new file mode 100644 index 00000000..57d0704d --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/10_ode.svg @@ -0,0 +1,326 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_01/10_ode_m.svg b/docs_vitepress/src/lectures/lecture_01/10_ode_m.svg new file mode 100644 index 00000000..3a926da7 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/10_ode_m.svg @@ -0,0 +1,372 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_01/basics.md b/docs_vitepress/src/lectures/lecture_01/basics.md new file mode 100644 index 00000000..126ba986 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/basics.md @@ -0,0 +1,129 @@ +# Syntax + +## Elementary syntax: Matlab heritage + +Very much like matlab: + +- indexing from 1 +- array as first-class ```A=[1 2 3]``` + + +::: tip Useful links + +- Cheat sheet: https://cheatsheets.quantecon.org/ +- Introduction: https://juliadocs.github.io/Julia-Cheat-Sheet/ + +::: + + +### Arrays are first-class citizens + +Many design choices were motivated considering matrix arguments: + +- ``` x *= 2``` is implemented as ```x = x*2``` causing new allocation (vectors). + +The reason is consistency with matrix operations: ```A *= B``` works as ```A = A*B```. + +### Broadcasting operator + +Julia generalizes matlabs ```.+``` operation to general use for any function. + +```julia +a = [1 2 3] +sin.(a) +f(x)=x^2+3x+8 +f.(a) +``` + +Solves the problem of inplace multiplication + +- ``` x .*= 2``` + + +## Functional roots of Julia + +Function is a first-class citizen. + +Repetition of functional programming: + +```julia +function mymap(f::Function,a::AbstractArray) + b = similar(a) + for i in eachindex(a) + b[i]=f(a[i]) + end + b +end +``` + +Allows for anonymous functions: + +```julia +mymap(x->x^2+2,[1.0,2.0]) +``` + +Function properties: + +- Arguments are passed by reference (change of mutable inputs inside the function is visible outside) +- Convention: function changing inputs have a name ending by "!" symbol +- return value + - the last line of the function declaration, + - ```return``` keyword +- zero cost abstraction + +### Different style of writing code + +Definitions of multiple small functions and their composition + +```julia +fsum(x) = x +fsum(x,p...) = x+fsum(p[1],p[2:end]...) +``` + +a single methods may not be sufficient to understand the full algorithm. In procedural language, you may write: + +```matlab +function out=fsum(x,varargin) + if nargin==2 # TODO: better treatment + out=x + else + out = fsum(varargin{1},varargin{2:end}) + end +``` + +The need to build intuition for function composition. + +Dispatch is easier to optimize by the compiler. + + +## Operators are functions + +| operator | function name | +| --- | --- | +| [A B C ...] | hcat | +| [A; B; C; ...] | vcat | +| [A B; C D; ...] | hvcat | +| A' | adjoint | +| A[i] | getindex | +| A[i] = x | setindex! | +| A.n | getproperty | +| A.n = x | setproperty! | + +```julia +struct Foo end + +Base.getproperty(a::Foo, x::Symbol) = x == :a ? 5 : error("does not have property $(x)") +``` + +Can be redefined and overloaded for different input types. The ```getproperty``` method can define access to the memory structure. + +## Broadcasting revisited + +The ```a.+b``` syntax is a syntactic sugar for ```broadcast(+,a,b)```. + +The special meaning of the dot is that they will be fused into a single call: + +- ```f.(g.(x .+ 1))``` is treated by Julia as ```broadcast(x -> f(g(x + 1)), x)```. +- An assignment ```y .= f.(g.(x .+ 1))``` is treated as in-place operation ```broadcast!(x -> f(g(x + 1)), y, x)```. + +The same logic works for lists, tuples, etc. diff --git a/docs_vitepress/src/lectures/lecture_01/bench_incremental.svg b/docs_vitepress/src/lectures/lecture_01/bench_incremental.svg new file mode 100644 index 00000000..73556094 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/bench_incremental.svg @@ -0,0 +1,413 @@ + + + + +Gnuplot +Produced by GNUPLOT 4.6 patchlevel 5 + + + + + + + + + + + + + + + + + + + + + + + + + + 0 + + + 1000 + + + 2000 + + + 3000 + + + 4000 + + + 5000 + + + 6000 + + + 7000 + + + 8000 + + + 9000 + + + 0 + + + 200 + + + 400 + + + 600 + + + 800 + + + 1000 + + + 1200 + + + 1400 + + + 1600 + + + 1800 + + + 2000 + + + MFLOPS + + + Matrix dimensions N=M=K + + + Compute C + A*B + + + gnuplot_plot_1 + + + Netlib RefBLAS + + + + + + + + + + + + + + + gnuplot_plot_2 + + + demo-pure-c + + + + + + + + + + + + + + + gnuplot_plot_3 + + + demo-naive-sse-with-intrinsics + + + + + + + + + + + + + + + gnuplot_plot_4 + + + demo-naive-sse-with-intrinsics-unrolled + + + + + + + + + + + + + + + gnuplot_plot_5 + + + demo-sse-intrinsics + + + + + + + + + + + + + + + gnuplot_plot_6 + + + demo-sse-intrinsics-v2 + + + + + + + + + + + + + + + gnuplot_plot_7 + + + demo-sse-asm + + + + + + + + + + + + + + + gnuplot_plot_8 + + + demo-sse-asm-unrolled + + + + + + + + + + + + + + + gnuplot_plot_9 + + + demo-sse-asm-unrolled-v2 + + + + + + + + + + + + + + + gnuplot_plot_10 + + + demo-sse-asm-unrolled-v3 + + + + + + + + + + + + + + + gnuplot_plot_11 + + + demo-sse-all-asm + + + + + + + + + + + + + + + gnuplot_plot_12 + + + "demo-sse-all-asm-try-prefetching" using 4:13 + + + + + + + + + + + + + + + + + + + + + + + + + gnuplot_plot_13 + + + "demo-sse-all-asm-try-prefetching-v2" using 4:13 + + + + + + + + + + + + + + + + + + + + + + + + + gnuplot_plot_14 + + + "demo-sse-all-asm-with-prefetching" using 4:13 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_01/benchmarks.svg b/docs_vitepress/src/lectures/lecture_01/benchmarks.svg new file mode 100644 index 00000000..7a1ff5da --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/benchmarks.svg @@ -0,0 +1,312 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_01/blas_benchmark.png b/docs_vitepress/src/lectures/lecture_01/blas_benchmark.png new file mode 100644 index 00000000..07f4773d Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_01/blas_benchmark.png differ diff --git a/docs_vitepress/src/lectures/lecture_01/carton_programmer.jpg b/docs_vitepress/src/lectures/lecture_01/carton_programmer.jpg new file mode 100644 index 00000000..9e81726a Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_01/carton_programmer.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_01/cartoon_comp.jpg b/docs_vitepress/src/lectures/lecture_01/cartoon_comp.jpg new file mode 100644 index 00000000..3179456d Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_01/cartoon_comp.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_01/demo.md b/docs_vitepress/src/lectures/lecture_01/demo.md new file mode 100644 index 00000000..30361524 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/demo.md @@ -0,0 +1,73 @@ +# Extensibility of the language + +## DifferentialEquations + +A package for solving differential equations, similar to ```odesolve``` in Matlab. + +Example: + +```julia +using DifferentialEquations, Plots + +function lotka_volterra(du,u,p,t) + x, y = u + α, β, δ, γ = p + du[1] = dx = α*x - β*x*y + du[2] = dy = -δ*y + γ*x*y +end + +u0 = [1.0,1.0] +tspan = (0.0,10.0) +p = [1.5,1.0,3.0,1.0] +prob = ODEProblem(lotka_volterra,u0,tspan,p) + +sol = solve(prob) +plot(sol) +``` + +![](10_ode.svg) + +## Measurements + +A package defining "numbers with precision" and complete algebra on these numbers: + +```julia +using Measurements + +a = 4.5 ± 0.1 +b = 3.8 ± 0.4 + +2a + b +sin(a)/cos(a) - tan(a) +``` + +It also defines recipes for Plots.jl how to plot such numbers. + +## Starting ODE from an interval + +```julia +u0 = [1.0±0.1,1.0±0.01] + +prob = ODEProblem(lotka_volterra,u0,tspan,p) +sol = solve(prob) +plot(sol,denseplot=false) +``` + +![](10_ode_m.svg) + +- all algebraic operations are defined, +- passes all grid refinement techniques +- plot uses the correct plotting for intervals + +## Integration with other toolkits + +**Flux:** toolkit for modelling Neural Networks. Neural network is a function. + +- integration with Measurements, +- Integration with ODE (think of NN as part of the ODE) + +**Turing:** Probabilistic modelling toolkit + +- integration with FLux (NN) +- interation with ODE +- using arbitrary bijective transformations, Bijectors.jl diff --git a/docs_vitepress/src/lectures/lecture_01/demo_inline.jl b/docs_vitepress/src/lectures/lecture_01/demo_inline.jl new file mode 100644 index 00000000..a5297d66 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/demo_inline.jl @@ -0,0 +1,4 @@ +fsum(x::Int,p...)=x+fsum(p[1],p[2:end]...) +fsum(x::Int) = x + + diff --git a/docs_vitepress/src/lectures/lecture_01/hw.md b/docs_vitepress/src/lectures/lecture_01/hw.md new file mode 100644 index 00000000..d6ad2da1 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/hw.md @@ -0,0 +1,112 @@ +# Homework 1: + +## Extending `polynomial` the other way + +::: warning Homework + +Extend the original polynomial function to the case where `x` is a square matrix. Create a function called `circlemat`, that returns `nxn` matrix $$A(n)$$ with the following elements +```math +\left[A(n)\right]_{ij} = +\begin{cases} + 1 &\text{if } (i = j-1 \land j > 1) \lor (i = n \land j=1) \\ + 1 &\text{if } (i = j+1 \land j < n) \lor (i = 1 \land j=n) \\ + 0 & \text{ otherwise} +\end{cases} +``` +and evaluate the polynomial +```math +f(A) = I + A + A^2 + A^3, +``` +at point $$A = A(10)$$. + +**HINTS** for matrix definition: +You can try one of these options: +- create matrix with all zeros with `zeros(n,n)`, use two nested for loops going in ranges `1:n` and if condition with logical or `||`, and `&&` +- employ array comprehension with nested loops `[expression for i in 1:n, j in 1:n]` and ternary operator `condition ? true branch : false` + +**HINTS** for `polynomial` extension: +- extend the original example (one with for-loop) to initialize the `accumulator` variable with matrix of proper size (use `size` function to get the dimension), using argument typing for `x` is preferred to distinguish individual implementations `<: AbstractMatrix` +or +- test later defined `polynomial` methods, that may work out of the box + +::: + +::: details Show solution + +Nothing to see here. + +::: + +## How to submit? + +Put all the code for the exercise above in a file called `hw.jl` and upload it to +[BRUTE](https://cw.felk.cvut.cz/brute/). +If you have any questions, write an email to one of the [lab instructors](@ref emails) of the course. + +## Voluntary + +::: warning Exercise + +Install `GraphRecipes` and `Plots` packages into the environment defined during the lecture and figure out, how to plot the graph defined by adjacency matrix `A` from the homework. + +**HINTS**: +- There is help command inside the the pkg mod of the REPL. Type `? add` to find out how to install a package. Note that both pkgs are registered. +- Follow a guide in the `Plots` pkg's documentation, which is accessible through `docs` icon on top of the README in the GitHub [repository](https://github.com/JuliaPlots/Plots.jl). Direct [link](http://docs.juliaplots.org/latest/graphrecipes/introduction/#GraphRecipes). + +::: + +::: details Show solution + +Activate the environment in pkg mode, if it is not currently active. +```julia +pkg> activate . +``` +Installing pkgs is achieved using the `add` command. Running `] ? add` returns a short piece of documentation for this command: +```julia +pkg> ? add +[...] + Examples + + pkg> add Example # most commonly used for registered pkgs (installs usually the latest release) + pkg> add Example@0.5 # install with some specific version (realized through git tags) + pkg> add Example#master # install from master branch directly + pkg> add Example#c37b675 # install from specific git commit + pkg> add https://github.com/JuliaLang/Example.jl#master # install from specific remote repository (when pkg is not registered) + pkg> add git@github.com:JuliaLang/Example.jl.git # same as above but using the ssh protocol + pkg> add Example=7876af07-990d-54b4-ab0e-23690620f79a # when there are multiple pkgs with the same name +``` + +As the both `Plots` and `GraphRecipes` are registered and we don't have any version requirements, we will use the first option. +```julia +pkg> add Plots +pkg> add GraphRecipes +``` +This process downloads the pkgs and triggers some build steps, if for example some binary dependencies are needed. The process duration depends on the "freshness" of Julia installation and the size of each pkg. With `Plots` being quite dependency heavy, expect few minutes. After the installation is complete we can check the updated environment with the `status` command. +```julia +pkg> status +``` + +The plotting itself as easy as calling the `graphplot` function on our adjacency matrix. + +```@repl lab01_base +using GraphRecipes, Plots +A = [ # hide + 0 1 0 0 0 0 0 0 0 1; # hide + 1 0 1 0 0 0 0 0 0 0; # hide + 0 1 0 1 0 0 0 0 0 0; # hide + 0 0 1 0 1 0 0 0 0 0; # hide + 0 0 0 1 0 1 0 0 0 0; # hide + 0 0 0 0 1 0 1 0 0 0; # hide + 0 0 0 0 0 1 0 1 0 0; # hide + 0 0 0 0 0 0 1 0 1 0; # hide + 0 0 0 0 0 0 0 1 0 1; # hide + 1 0 0 0 0 0 0 0 1 0 # hide +] # hide +graphplot(A) +``` + +```@example lab01_base +graphplot(A) #hide +``` + +::: diff --git a/docs_vitepress/src/lectures/lecture_01/julia-compilation.svg b/docs_vitepress/src/lectures/lecture_01/julia-compilation.svg new file mode 100644 index 00000000..792cc710 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/julia-compilation.svg @@ -0,0 +1,1654 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_01/julia-scope.pdf b/docs_vitepress/src/lectures/lecture_01/julia-scope.pdf new file mode 100644 index 00000000..8264f48b Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_01/julia-scope.pdf differ diff --git a/docs_vitepress/src/lectures/lecture_01/julia-scope.svg b/docs_vitepress/src/lectures/lecture_01/julia-scope.svg new file mode 100644 index 00000000..15b06ee1 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/julia-scope.svg @@ -0,0 +1,696 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + computerfriendly + programmerfriendly + + C + gcc + llvm + clang + MATLAB + + mex + (c)python + + bindings + C++ + numba + Julia + cython + + + ccall + pycall + statictyping + dynamictyping + + + ASM + + OOPtemplates + + + DLL + + diff --git a/docs_vitepress/src/lectures/lecture_01/lab.md b/docs_vitepress/src/lectures/lecture_01/lab.md new file mode 100644 index 00000000..a9a971fe --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/lab.md @@ -0,0 +1,781 @@ +# Lab 01: Introduction to Julia + +This lab should get everyone up to speed in the basics of Julia's installation, syntax and basic +coding. For more detailed introduction you can check out Lectures 1-3 of the bachelor +[course](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/). + +## Testing Julia installation (custom setup) + +In order to proceed further let's run a simple [script](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/docs/src/lecture_01/test_setup.jl) to see, that the setup described in chapter [Installation](@ref install) is working properly. +After spawning a terminal/cmdline run this command: + +```bash +julia ./test_setup.jl +``` + +The script does the following + +- "Tests" if Julia is added to path and can be run with `julia` command from anywhere +- Prints Julia version info +- Checks Julia version. +- Checks git configuration (name + email) +- Creates an environment configuration files +- Installs a basic pkg called BenchmarkTools, which we will use for benchmarking a simple function later in the labs. + +There are some quality of life improvements over long term support versions of Julia and thus throughout this course we will use the latest stable release of Julia 1.6.x. + +## Polynomial evaluation example + +Let's consider a common mathematical example for evaluation of nth-degree polynomial + +```math +f(x) = a_{n}x^{n} + a_{n-1}x^{n-1} + \dots + a_{0}x^{0}, +``` + +where $x \in \mathbb{R}$ and $\vec{a} \in \mathbb{R}^{n+1}$. + +The simplest way of writing this in a generic fashion is realizing that essentially the function $f$ is really implicitly containing argument $\vec{a}$, i.e. $f \equiv f(\vec{a}, x)$, yielding the following Julia code + +```@example lab01_base +function polynomial(a, x) + accumulator = 0 + for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] # ! 1-based indexing for arrays + end + return accumulator +end +nothing #hide +``` + +::: warning Exercise + +Evaluate the code of the function called `polynomial` in Julia REPL and evaluate the function itself with the following arguments. + +```@example lab01_base +a = [-19, 7, -4, 6] # list coefficients a from a^0 to a^n +x = 3 # point of evaluation +nothing #hide +``` + +::: + +::: details Show solution + +The simplest way is to just copy&paste into an already running terminal manually. As opposed to the default Python REPL, Julia can deal with the blocks of code and different indentation much better without installation of an `ipython`-like REPL. There are ways to make this much easier in different text editors/IDEs: + +- `VSCode` - when using Julia extension is installed and `.jl` file is opened, `Ctrl/Cmd+Enter` will spawn Julia REPL +- `Sublime Text` - `Ctrl/Cmd+Enter` with `Send Code` pkg (works well with Linux terminal or tmux, support for Windows is poor) +- `Vim` - there is a Julia language [plugin](https://github.com/JuliaEditorSupport/julia-vim), which can be combine with [vimcmdline](https://github.com/jalvesaq/vimcmdline) to gain similar functionality + +Either way, you should see the following: + +```@repl lab01_base +function polynomial(a, x) + accumulator = 0 + for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] # ! 1-based indexing for arrays + end + return accumulator +end +``` + +Similarly we enter the arguments of the function `a` and `x`: + +```@repl lab01_base +a = [-19, 7, -4, 6] +x = 3 +``` + +Function call intuitively takes the name of the function with round brackets as arguments, i.e. works in the same way as majority of programming languages. The result is printed unless a `;` is added at the end of the statement. + +```@repl lab01_base +polynomial(a, x) # function call +``` + +::: + +Thanks to the high level nature of Julia language it is often the case that examples written in pseudocode are almost directly rewritable into the language itself without major changes and the code can be thus interpreted easily. + +![polynomial_explained](./polynomial.svg) + +Due to the existence of the `end` keyword, indentation is not necessary as opposed to other languages such as Python, however it is strongly recommended to use it, see [style guide](https://docs.julialang.org/en/v1/manual/style-guide/#Style-Guide). + +Though there are libraries/IDEs that allow us to step through Julia code (`Debugger.jl` [link](https://github.com/JuliaDebug/Debugger.jl) and `VSCode` [link](https://www.julia-vscode.org/docs/stable/userguide/debugging/)), here we will explore the code interactively in REPL by evaluating pieces of code separately. + +### Basic types, assignments and variables + +When defining a variable through an assignment we get the representation of the right side, again this is different from the default behavior in Python, where the output of assignments `a = [-19, 7, -4, 6]` or `x = 3`, prints nothing. Internally Julia returns the result of the `display` function. + +```@repl lab01_base +a = [-19, 7, -4, 6] +display(a) # should return the same thing as the line above +``` + +As you can see, the string that is being displayed contains information about the contents of a variable along with it's type in this case this is a `Vector/Array` of `Int` types. If the output of `display` is insufficient the type of variable can be checked with the `typeof` function: + +```@repl lab01_base +typeof(a) +``` + +Additionally for collection/iterable types such as `Vector` there is also the `eltype` function, which returns the type of elements in the collection. + +```@repl lab01_base +eltype(a) +``` + +In most cases variables store just a reference to a place in memory either stack/heap (exceptions are primitive types such as `Int`, `Float`) and therefore creating an array `a`, "storing" the reference in `b` with an assignment and changing elements of `b`, e.g. `b[1] = 2`, changes also the values in `a`. + +::: warning Exercise + +Create variables `x` and `accumulator`, storing floating point `3.0` and integer value `0` respectively. Check the type of variables using `typeof` function. + +::: + +::: details Show solution + +```@repl lab01_base +x = 3.0 +accumulator = 0 +typeof(x), typeof(accumulator) +``` + +::: + +### For cycles and ranges + +Moving further into the polynomial function we encounter the definition of a for cycle, with the de facto standard syntax + +```julia +for iteration_variable in iterator + # do something +end +``` + +As an example of iterator we have used an instance of a range type + +```@repl lab01_base +r = length(a):-1:1 +typeof(r) +``` + +As opposed to Python, ranges in Julia are *inclusive*, i.e. they contain number from start to end - in this case running from `4` to `1` with negative step `-1`, thus counting down. This can be checked with the `collect` and/or `length` functions. + +```@repl lab01_base +collect(r) +length(r) +``` + +::: warning Exercise + +Create variable `c` containing an array of even numbers from `2` to `42`. Furthermore create variable `d` that is different from `c` only at the 7th position, which will contain `13`. + +**HINT**: Use `collect` function for creation of `c` and `copy` for making a copy of `c`. + +::: + +::: details Show solution + +```@repl lab01_base +c = collect(2:2:42) +d = copy(c) +d[7] = 13 +d +``` + +::: + +### Functions and operators + +Let us now move from the function body to the function definition itself. From the picture at the top of the page, we can infer the general syntax for function definition: + +```julia +function function_name(arguments) + # do stuff with arguments and define output value `something` + return something +end +``` + +The return keyword can be omitted, if the last line being evaluated contains the result. + +By creating the function `polynomial` we have defined a variable `polynomial`, that from now on always refers to a function and cannot be reassigned to a different type, like for example `Int`. + +```@repl lab01_base +polynomial = 42 +``` + +This is caused by the fact that each function defines essentially a new type, the same like `Int ~ Int64` or `Vector{Int}`. + +```@repl lab01_base +typeof(polynomial) +``` + +You can check that it is a subtype of the `Function` abstract type, with the subtyping operator `<:` + +```@repl lab01_base +typeof(polynomial) <: Function +``` + +These concepts will be expanded further in the [type system lecture](@ref type_system), however for now note that this construction is quite useful for example if we wanted to create derivative rules for our function `derivativeof(::typeof(polynomial), ...)`. + +Looking at mathematical operators `+`, `*`, we can see that in Julia they are also standalone functions. + +```@repl lab01_base ++ +* +``` + +The main difference from our `polynomial` function is that there are multiple methods, for each of these functions. Each one of the methods corresponds to a specific combination of arguments, for which the function can be specialized to using *multiple dispatch*. You can see the list by calling a `methods` function: + +```julia +julia> methods(+) +# 190 methods for generic function "+": +[1] +(x::T, y::T) where T<:Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8} in Base at + int.jl:87 +[2] +(c::Union{UInt16, UInt32, UInt64, UInt8}, x::BigInt) in Base.GMP at gmp.jl:528 +[3] +(c::Union{Int16, Int32, Int64, Int8}, x::BigInt) in Base.GMP at gmp.jl:534 +... +``` + +One other notable difference is that these functions allow using both infix and postfix notation `a + b` and `+(a,b)`, which is a specialty of elementary functions such as arithmetic operators or set operation such as `∩, ∪, ∈`. + +The functionality of `methods` is complemented with the reverse lookup `methodswith`, which for a given type returns a list of methods that can be called with it as an argument. + +```julia +julia> methodswith(Int) +[1] +(x::T, y::T) where T<:Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8} in Base at int.jl:87 +[2] +(c::Union{Int16, Int32, Int64, Int8}, x::BigInt) in Base.GMP at gmp.jl:534 +[3] +(c::Union{Int16, Int32, Int64, Int8}, x::BigFloat) in Base.MPFR at mpfr.jl:384 +[4] +(x::BigFloat, c::Union{Int16, Int32, Int64, Int8}) in Base.MPFR at mpfr.jl:379 +[5] +(x::BigInt, c::Union{Int16, Int32, Int64, Int8}) in Base.GMP at gmp.jl:533 +... +``` + +::: warning Exercise + +Define function called `addone` with one argument, that adds `1` to the argument. + +::: + +::: details Show solution + +```@repl lab01_base +function addone(x) + x + 1 +end + +addone(1) == 2 +``` + +::: + +### Calling for help + +In order to better understand some keywords we have encountered so far, we can ask for help in the Julia's REPL itself with the built-in help terminal. Accessing help terminal can be achieved by writing `?` with a query keyword after. This searches documentation of all the available source code to find the corresponding keyword. The simplest way to create documentation, that can be accessed in this way, is using so called `docstring`s, which are multiline strings written above function or type definition. + +```julia +""" + polynomial(a, x) + +Returns value of a polynomial with coefficients `a` at point `x`. +""" +function polynomial(a, x) + # function body +end +``` + +More on this in lecture 4 about pkg development. + +::: warning Exercise + +Lookup `docstring` for the basic functions that we have introduced in the previous exercises: `typeof`, `eltype`, `length`, `collect`, `copy`, `methods` and `methodswith`. + +**BONUS**: Try it with others, for example with the subtyping operator `<:`. + +::: + +::: details Show solution + +Example docstring for `typeof` function. + +```julia + typeof(x) + + Get the concrete type of x. + + Examples + ≡≡≡≡≡≡≡≡≡≡ + + julia> a = 1//2; + + julia> typeof(a) + Rational{Int64} + + julia> M = [1 2; 3.5 4]; + + julia> typeof(M) + Matrix{Float64} (alias for Array{Float64, 2}) +``` + +::: + +## Testing waters + +As the arguments of the `polynomial` functions are untyped, i.e. they do not specify the allowed types like for example `polynomial(a, x::Number)` does, the following exercise explores which arguments the function accepts, while giving expected result. + +::: warning Exercise + +Choose one of the variables `af` to `ac` representing polynomial coefficients and try to evaluate it with the `polynomial` function at point `x=3` as before. Lookup the type of coefficient collection variable itself with `typeof` and the items in the collection with `eltype`. In this case we allow you to consult your solution with the expandable solution bellow to find out more information about a particular example. + +```@example lab01_base +af = [-19.0, 7.0, -4.0, 6.0] +at = (-19, 7, -4, 6) +ant = (a₀ = -19, a₁ = 7, a₂ = -4, a₃ = 6) +a2d = [-19 -4; 7 6] +ac = [2i^2 + 1 for i in -2:1] +nothing #hide +``` + +::: + +::: details Show solution + +```@repl lab01_base +typeof(af), eltype(af) +polynomial(af, x) +``` + +As opposed to the basic definition of `a` type the array is filled with `Float64` types and the resulting value gets promoted as well to the `Float64`. + +```@repl lab01_base +typeof(at), eltype(at) +polynomial(at, x) +``` + +With round brackets over a fixed length vector we get the `Tuple` type, which is so called immutable "array" of a fixed size (its elements cannot be changed, unless initialized from scratch). Each element can be of a different type, but here we have only one and thus the `Tuple` is aliased into `NTuple`. There are some performance benefits for using immutable structure, which will be discussed [later](@ref type_system). + +Defining `key=value` pairs inside round brackets creates a structure called `NamedTuple`, which has the same properties as `Tuple` and furthermore its elements can be conveniently accessed by dot syntax, e.g. `ant.a₀`. + +```@repl lab01_base +typeof(ant), eltype(ant) +polynomial(ant, x) +``` + +Defining a 2D array is a simple change of syntax, which initialized a matrix row by row separated by `;` with spaces between individual elements. The function returns the same result because linear indexing works in 2d arrays in the column major order. + +```@repl lab01_base +typeof(a2d), eltype(a2d) +polynomial(a2d, x) +``` + +The last example shows so called array comprehension syntax, where we define and array of known length using and for loop iteration. Resulting array/vector has integer elements, however even mixed type is possible yielding `Any`, if there isn't any other common supertype to `promote` every entry into. (Use `?` to look what `promote` and `promote_type` does.) + +```@repl lab01_base +typeof(ac), eltype(ac) +polynomial(ac, x) +``` + +::: + +So far we have seen that `polynomial` function accepts a wide variety of arguments, however there are some understandable edge cases that it cannot handle. + +Consider first the vector/array of characters `ach` + +```@example lab01_base +ach = ['1', '2', '3', '4'] +``` + +which themselves have numeric values (you can check by converting them to Int `Int('1')` or `convert(Int, '1')`). In spite of that, our untyped function cannot process such input, as there isn't an operation/method that would allow multiplication of `Char` and `Int` type. Julia tries to promote the argument types to some common type, however checking the `promote_type(Int, Char)` returns `Any` (union of all types), which tells us that the conversion is not possible automatically. + +```@repl lab01_base +typeof(ach), eltype(ach) +polynomial(ach, x) +``` + +In the stacktrace we can see the location of each function call. If we include the function `polynomial` from some file `poly.jl` using `include("poly.jl")`, we will see that the location changes from `REPL[X]:10` to the actual file name. + +By swapping square brackets for round in the array comprehension `ac` above, we have defined so called generator/iterator, which as opposed to original variable `ac` does not allocate an array, only the structure that produces it. + +```@example lab01_base +ag = (2i^2 + 1 for i in -2:1) +typeof(ag), eltype(ag) +``` + +You may notice that the element type in this case is `Any`, which means that a function using this generator as an argument cannot specialize based on the type and has to infer it every time an element is generated/returned. We will touch on how this affects performance in one of the later lectures. + +```@repl lab01_base +polynomial(ag, x) +``` + +The problem that we face during evaluation is that generator type is missing the `getindex` operation, as they are made for situations where the size of the collection may be unknown and the only way of obtaining particular elements is through sequential iteration. Generators can be useful for example when creating batches of data for a machine learning training. We can "fix" the situation using `collect` function, mentioned earlier, however that again allocates an array. + +## Extending/limiting the polynomial example + +Following up on the polynomial example, let's us expand it a little further in order to facilitate the arguments, that have been throwing exceptions. The first direction, which we will move forward to, is providing the user with more detailed error message when an incorrect type of coefficients has been provided. + +::: warning Exercise + +Design an `if-else` condition such that the array of `Char` example throws an error with custom string message, telling the user what went wrong and printing the incorrect input alongside it. Confirm that we have not broken the functionality of other examples from previous exercise. + +**HINTS:** +- Throw the `ArgumentError(msg)` with `throw` function and string message `msg`. More details in help mode `?` or at the end of this [document](@ref lab_errors). +- Strings are defined like this `s = "Hello!"` +- Use string interpolation to create the error message. It allows injecting an expression into a string with the `$` syntax `b = 1; s = "Hellow Number $(b)"` +- Compare `eltype` of the coefficients with `Char` type. +- The syntax for `if-else`: + +```julia +if condition + println("true") # true branch code +else + println("false") # false branch code +end +``` + +- Not equal condition can be written as `a != b`. +- Throwing an exception automatically returns from the function. Use return inside one of the branches to return the correct value. + +::: + +::: details Show solution + +The simplest way is to wrap the whole function inside an `if-else` condition and returning only when the input is "correct" (it will still fail in some cases). + +```@repl lab01_base +function polynomial(a, x) + if eltype(a) != Char + accumulator = 0 + for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] # ! 1-based indexing for arrays + end + return accumulator + else + throw(ArgumentError("Invalid coefficients $(a) of type Char!")) + end +end +nothing #hide +``` + +Now this should show our predefined error message. + +```@repl lab01_base +polynomial(ach, x) +``` + +Testing on other examples should pass without errors and give the same output as before. + +```@repl lab01_base +polynomial(a, x) +polynomial(af, x) +polynomial(at, x) +polynomial(ant, x) +polynomial(a2d, x) +polynomial(ac, x) +``` + +::: + +The second direction concerns the limitation to index-able structures, which the generator example is not. For this we will have to rewrite the whole loop in a more functional programming approach using `map`, anonymous function and other concepts. + +::: warning Exercise + +Rewrite the following code inside our original `polynomial` function with `map`, `enumerate` and anonymous function. + +```julia +accumulator = 0 +for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] # ! 1-based indexing for arrays +end +``` + +***Anonymous functions reminder*** + +```@repl lab01_anonymous +x -> x + 1 # unless the reference is stored it cannot be called +plusone = x -> x + 1 # the reference can be stored inside a variable +plusone(x) # calling with the same syntax +``` + +**HINTS:** +- Use `enumerate` to obtain iterator over `a` that returns a tuple of `ia = (i, aᵢ)`. With Julia 1-based indexing `i` starts also from 1 and goes up to `length(a)`. +- Pass this into a `map` with either in-place or predefined anonymous function that does the operation of `x^(i-1) * aᵢ`. +- Use `sum` to collect the resulting array into `accumulator` variable or directly into the `return` command. + +**BONUS:** +Can you figure out how to use the `mapreduce` function here? See entry in the help mode `?`. + +::: + +::: details Show solution + +Ordered from the longest to the shortest, here are three examples with the same functionality (and there are definitely many more). +Using the `map(iterable) do itervar ... end` syntax, that creates anonymous function from the block of code. + +```@example lab01_base +function polynomial(a, x) + powers = map(enumerate(a)) do (i, aᵢ) + x^(i-1) * aᵢ + end + accumulator = sum(powers) + return accumulator +end +nothing #hide +``` + +Using the default syntax for `map` and storing the anonymous into a variable + +```@example lab01_base +function polynomial(a, x) + polypow(i,aᵢ) = x^(i-1) * aᵢ + powers = map(polypow, enumerate(a)) + return sum(powers) +end +nothing #hide +``` + +As the function `polypow` is used only once, there is no need to assign it to a local variable. +Note the sightly awkward additional parenthesis in the argument of the lambda function. + +```@example lab01_base +function polynomial(a, x) + powers = map(((i,aᵢ),) -> x^(i-1) * aᵢ, enumerate(a)) + sum(powers) +end +nothing #hide +``` + +Checking the behavior on all the inputs. + +```@repl lab01_base +polynomial(a, x) +polynomial(af, x) +polynomial(at, x) +polynomial(ant, x) +polynomial(a2d, x) +polynomial(ach, x) +polynomial(ac, x) +polynomial(ag, x) +``` + +**BONUS:** You may have noticed that in the example above, the `powers` variable is allocating an +additional, unnecessary vector. With the current, scalar `x`, this is not such a big deal. But in +your homework you will generalize this function to matrix inputs of `x`, which means that `powers` +becomes a vector of (potentially very large) matrices. This is a very natural use case for the +`mapreduce` function: + +```@example lab01_base +polynomial(a, x) = mapreduce(+, enumerate(a), init=zero(x)) do (i, aᵢ) + x^(i-1) * aᵢ +end + +polynomial(a, x) +``` + +Let's unpack what is happening here. If the function `mapreduce(f, op, itr)` is called with `op=+` +it returns the same result as `sum(map(f, itr))`. In contrast to `sum(map(f, itr))` (which +allocates a vector as a result of `map` and **then** sums) `mapreduce` applies `f` to an element in +`itr` and **immediately accumulates** the result with the given `op=+`. + +```@repl lab01_base +polynomial(a, x) = sum(ia -> x^(ia[1]-1) * ia[2], enumerate(a)) +nothing #hide +``` + +::: + +## How to use code from other people + +The script that we have run at the beginning of this lab has created two new files inside the current folder: + +``` +./ + ├── Manifest.toml + └── Project.toml +``` + +Every folder with a toml file called `Project.toml`, can be used by Julia's pkg manager into setting so called environment, which contains a list of pkgs to be installed. Setting up or more often called activating an environment can be done either before starting Julia itself by running julia with the `--project XXX` flag or from within the Julia REPL, by switching to Pkg mode with `]` key (similar to the help mode activated by pressing `?`) and running command `activate`. + +So far we have used the general environment (depending on your setup), which by default does not come with any 3rd party packages and includes only the base and standard libraries - [already](https://docs.julialang.org/en/v1/base/arrays/) [quite](https://docs.julialang.org/en/v1/base/multi-threading/) [powerful](https://docs.julialang.org/en/v1/stdlib/Distributed/) [on its own](https://docs.julialang.org/en/v1/stdlib/LinearAlgebra/). + +In order to find which environment is currently active, run the following: + +```julia +pkg> status +``` + +The output of such command usually indicates the general environment located at `.julia/` folder (`${HOME}/.julia/` or `${APPDATA}/.julia/` in case of Unix/Windows based systems respectively) + +```julia +pkg> status +Status `~/.julia/environments/v1.6/Project.toml` (empty project) +``` + +Generally one should avoid working in the general environment, with the exception of some generic pkgs, such as `PkgTemplates.jl`, which is used for generating library templates/folder structure like the one above ([link](https://github.com/invenia/PkgTemplates.jl)), more on this in the lecture on pkg development. + + +::: warning Exercise + +Activate the environment inside the current folder and check that the `BenchmarkTools` package has been installed. Use `BenchmarkTools` pkg's `@btime` to benchmark our `polynomial` function with the following arguments. + +```@example lab01_base +aexp = ones(10) ./ factorial.(0:9) +x = 1.1 +nothing #hide +``` + +**HINTS:** +- In pkg mode use the command `activate` and `status` to check the presence. +- In order to import the functionality from other package, lookup the keyword `using` in the repl help mode `?`. +- The functionality that we want to use is the `@btime` macro (it acts almost like a function but with a different syntax `@macro arg1 arg2 arg3 ...`). More on macros in lecture 7. + +**BONUS**: Compare the output of `polynomial(aexp, x)` with the value of `exp(x)`, which it approximates. + +::: + +::: tip Broadcasting + +In the assignment's code, we are using quite ubiquitous concept in Julia called `broadcasting` or simply the `dot-syntax` - represented here by `./`, `factorial.`. This concept allows to map both simple arithmetic operations as well as custom functions across arrays, with the added benefit of increased performance, when the broadcasting system can merge operations into a more efficient code. More information can be found in the official [documentation](https://docs.julialang.org/en/v1/manual/arrays/#Broadcasting) or [section](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/lecture_01/arrays/#Broadcasting) of our bachelor course. + +::: + +::: details Show solution + +There are other options to import a function/macro from a different package, however for now let's keep it simple with the `using Module` syntax, that brings to the REPL, all the variables/function/macros exported by the `BenchmarkTools` pkg. If `@btime` is exported, which it is, it can be accessed without specification i.e. just by calling `@btime` without the need for `BenchmarkTools.@btime`. More on the architecture of pkg/module loading in the package developement lecture. + +```julia +julia> using BenchmarkTools + +julia> @btime polynomial(aexp, x) + 97.119 ns (1 allocation: 16 bytes) +3.004165230550543 +``` + +The output gives us the time of execution averaged over multiple runs (the number of samples is defined automatically based on run time) as well as the number of allocations and the output of the function, that is being benchmarked. + +**BONUS**: The difference between our approximation and the "actual" function value computed as a difference of the two. +```@repl lab01_base +polynomial(aexp, x) - exp(x) +``` + +The apostrophes in the previous sentence are on purpose, because implementation of `exp` also relies on a finite sum, though much more sophisticated than the basic Taylor expansion. + +::: + +## Discussion & future directions + +Instead of `if-else` statements that would throw an error for different types, in Julia, we generally see the pattern of typing the function in a way, that for other than desirable types `MethodError` is emitted with the information about closest matching methods. This is part of the design process in Julia of a function and for the particular functionality of the `polynomial` example, we can look into the Julia itself, where it has been implemented in the `evalpoly` function + +```@repl lab01_base +methods(evalpoly) +``` + +Another avenue, that we have only touched with the `BenchmarkTools`, is performance and will be further explored in the later lectures. + +With the next lecture focused on typing in Julia, it is worth noting that polynomials lend themselves quite nicely to a definition of a custom type, which can help both readability of the code as well further extensions. + +```julia +struct Polynom{C} + coefficients::{C} +end + +function (p:Polynom)(x) + polynomial(p.coefficients, x) +end +``` + +--- + +## Useful resources + +- Getting Started tutorial from JuliaLang documentation - [Docs](https://docs.julialang.org/en/v1/manual/getting-started/) +- Converting syntax between MATLAB ↔ Python ↔ Julia - [Cheatsheet](https://cheatsheets.quantecon.org/) +- Bachelor course for refreshing your knowledge - [Course](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/) +- Stylistic conventions - [Style Guide](https://docs.julialang.org/en/v1/manual/style-guide/#Style-Guide) +- Reserved keywords - [List](https://docs.julialang.org/en/v1/base/base/#Keywords) +- Official cheatsheet with basic syntax - [link](https://juliadocs.github.io/Julia-Cheat-Sheet/) + + +### [Various errors and how to read them](@id lab_errors) + +This section summarizes most commonly encountered types of errors in Julia and how to resolve them or at least understand, what has gone wrong. It expands a little bit the official [documentation](https://docs.julialang.org/en/v1/base/base/#Errors), which contains the complete list with examples. Keep in mind again, that you can use help mode in the REPL to query error types as well. + +#### `MethodError` + +This type of error is most commonly thrown by Julia's multiple dispatch system with a message like `no method matching X(args...)`, seen in two examples bellow. + +```@repl lab01_exceptions +2 * 'a' # many candidates +getindex((i for i in 1:4), 3) # no candidates +``` + +Both of these examples have a short stacktrace, showing that the execution failed on the top most level in `REPL`, however if this code is a part of some function in a separate file, the stacktrace will reflect it. What this error tells us is that the dispatch system could not find a method for a given function, that would be suitable for the type of arguments, that it has been given. In the first case Julia offers also a list of candidate methods, that match at least some of the arguments + +When dealing with basic Julia functions and types, this behavior can be treated as something given and though one could locally add a method for example for multiplication of `Char` and `Int`, there is usually a good reason why Julia does not support such functionality by default. On the other hand when dealing with user defined code, this error may suggest the developer, that either the functions are too strictly typed or that another method definition is needed in order to satisfy the desired functionality. + +#### `InexactError` + +This type of error is most commonly thrown by the type conversion system (centered around `convert` function), informing the user that it cannot exactly convert a value of some type to match arguments of a function being called. + +```@repl lab01_exceptions +Int(1.2) # root cause +append!([1,2,3], 1.2) # same as above but shows the root cause deeper in the stack trace +``` + +In this case the function being `Int` and the value a floating point. The second example shows `InexactError` may be caused deeper inside an inconspicuous function call, where we want to extend an array by another value, which is unfortunately incompatible. + +#### `ArgumentError` + +As opposed to the previous two errors, `ArgumentError` can contain user specified error message and thus can serve multiple purposes. It is however recommended to throw this type of error, when the parameters to a function call do not match a valid signature, e.g. when `factorial` were given negative or non-integer argument (note that this is being handled in Julia by multiple dispatch and specific `DomainError`). + +This example shows a concatenation of two 2d arrays of incompatible sizes 3x3 and 2x2. + +```@repl lab01_exceptions +hcat(ones(3,3), zeros(2,2)) +``` + +#### `KeyError` + +This error is specific to hash table based objects such as the `Dict` type and tells the user that and indexing operation into such structure tried to access or delete a non-existent element. + +```@repl lab01_exceptions +d = Dict(:a => [1,2,3], :b => [1,23]) +d[:c] +``` + +#### `TypeError` + +Type assertion failure, or calling an intrinsic function (inside LLVM, where code is strictly typed) with incorrect argument type. In practice this error comes up most often when comparing value of a type against the `Bool` type as seen in the example bellow. + +```@repl lab01_exceptions +if 1 end # calls internally typeassert(1, Bool) +typeassert(1, Bool) +``` + +In order to compare inside conditional statements such as `if-elseif-else` or the ternary operator `x ? a : b` the condition has to be always of `Bool` type, thus the example above can be fixed by the comparison operator: `if 1 == 1 end` (in reality either the left or the right side of the expression contains an expression or a variable to compare against). + +#### `UndefVarError` + +While this error is quite self-explanatory, the exact causes are often quite puzzling for the user. The reason behind the confusion is to do with *code scoping*, which comes into play for example when trying to access a local variable from outside of a given function or just updating a global variable from within a simple loop. + +In the first example we show the former case, where variable is declared from within a function and accessed from outside afterwards. + +```@repl lab01_exceptions +function plusone(x) + uno = 1 + return x + uno +end +uno # defined only within plusone +``` + +Unless there is variable `I_am_not_defined` in the global scope, the following should throw an error. + +```@repl lab01_exceptions +I_am_not_defined +``` + +Often these kind of errors arise as a result of bad code practices, such as long running sessions of Julia having long forgotten global variables, that do not exist upon new execution (this one in particular has been addressed by the authors of the reactive Julia notebooks [Pluto.jl](https://github.com/fonsp/Pluto.jl)). + +For more details on code scoping we recommend particular places in the bachelor course lectures [here](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/lecture_02/scope/#Soft-local-scope) and [there](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/lecture_03/scope/#Scope-of-variables). + +#### `ErrorException` & `error` function + +`ErrorException` is the most generic error, which can be thrown/raised just by calling the `error` function with a chosen string message. As a result developers may be inclined to misuse this for any kind of unexpected behavior a user can run into, often providing out-of-context/uninformative messages. diff --git a/docs_vitepress/src/lectures/lecture_01/motivation.md b/docs_vitepress/src/lectures/lecture_01/motivation.md new file mode 100644 index 00000000..483748e6 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/motivation.md @@ -0,0 +1,285 @@ +# Introduction to Scientific Programming + +::: tip Loose definition of Scientific Programming + +[Scientific programming languages](https://en.wikipedia.org/wiki/Scientific_programming_language) +are designed and optimized for implementing mathematical formulas and for computing with matrices. + +::: + +Examples of Scientific programming languages include ALGOL, APL, Fortran, J, Julia, Maple, MATLAB and R. + +Key requirements for a Scientific programming language: + +1. _**Fast**_ execution of the code (complex algorithms). +2. _**Ease**_ of code reuse / code restructuring. + +![](../../assets/julia-set.png) +```html +# TODO: caption for images not working +
+ +
+ Julia set. + Stolen from + Colorschemes.jl. +
+
+``` + +In contrast, to general-purpose language Julia has: + +- less concern with standalone executable/libraby compilation +- less concern with Application binary interface (ABI) +- less concern with business models (library + header files) +- less concern with public/private separation + +::: tip Example of a scientific task + +In many applications, we encounter the task of optimization a function given by a routine (e.g. engineering, finance, etc.) + +```julia +using Optim + +P(x,y) = x^2 - 3x*y + 5y^2 - 7y + 3 # user defined function + +z₀ = [0.0, 0.0] # starting point + +optimize(z -> P(z...), z₀, ConjugateGradient()) +optimize(z -> P(z...), z₀, Newton()) +optimize(z -> P(z...), z₀, Newton(); autodiff = :forward) +``` + +::: + +Very simple for a user, very complicated for a programmer. The program should: + - pick the right optimization method (easy by config-like approach) + - compute gradient (Hessian) of a *user* function + +## Classical approach: create a fast library and flexible calling environment + +Crucial algorithms (sort, least squares...) are relatively small and well defined. Application of these algorithms to real-world problem is typically not well defined and requires more code. Iterative development. + +Think of a problem of repeated execution of similar jobs with different options. Different level + +- binary executable with command-line switches +- binary executable with configuration file +- scripting language/environment (Read-Eval-Print Loop) + +It is not a strict boundary, increasing expressivity of the configuration file will create a new scripting language. + +Ending up in the *2 language problem*. + +1. Low-level programming = computer centric + - close to the hardware + - allows excellent optimization for fast execution + +1. High-level programming = user centric + - running code with many different modifications as easily as possible + - allowing high level of abstraction + +In scientific programming, the most well known scripting languages are: Python, Matlab, R + +- If you care about standard "configurations" they are just perfect. (PyTorch, BLAS) +- You hit a problem with more complex experiments, such a modifying the internal algorithms. + +The scripting language typically makes decisions (```if```) at runtime. Becomes slow. + +### Examples + +1. Basic Linear Algebra Subroutines (BLAS)--MKL, OpenBlas---with bindings (Matlab, NumPy) +1. Matlab and Mex (C with pointer arithmetics) +1. Python with transcription to C (Cython) + + +### Convergence efforts + +1. Just-in-time compilation (understands high level and converts to low-level) +1. automatic typing (auto in C++) (extends low-level with high-level concepts) + +# Julia approach: fresh thinking + +![](julia-scope.svg) + +A dance between specialization and abstraction. + +- **Specialization** allows for custom treatment. The right algorithm for the right circumstance is obtained by *Multiple dispatch*, +- **Abstraction** recognizes what remains the same after differences are stripped away. Abstractions in mathematics are captured as code through *generic programming*. + +Why a new language? + +## Challenge + +Translate high-level thinking with as much abstraction as possible into specific *fast* machine code. + +Not so easy! + +::: danger Indexing array x in Matlab + +```matlab +x = [1,2,3] +y=x(4/2) +y=x(5/2) +``` + +In the first case it works, in the second throws an error. + +- type instability +- function ```inde(x,n,m)=x(n/m)``` can never be fast. +- Poor language design choice! + +::: + +Simple solution + +- Solved by different floating and integer division operation ```/,÷``` +- Not so simple with complex objects, e.g. triangular matrices + + +Julia was designed as a high-level language that allows very high level abstract concepts but *propagates* as much information about the specifics as possible to help the compiler to generate as fast code as possible. Taking lessons from the inability to achieve fast code compilation (mostly from python). + +![](benchmarks.svg) + +- julia is faster than C? + +## Julia way + +Design principle: abstraction should have *zero* runtime cost + +- flexible type system with strong typing (abstract types) +- multiple dispatch +- single language from high to low levels (as much as possible) + optimize execution as much as you can during *compile time* + - functions as symbolic abstraction layers + +![](julia-compilation.svg) + +- AST = Abstract Syntax Tree +- IR = Intermediate Representation + +## Teaser example + +Function recursion with arbitrary number of arguments: + +```julia +fsum(x) = x +fsum(x,p...) = x+fsum(p...) +``` + +Defines essentially a sum of inputs. Nice generic and abstract concept. + +Possible in many languages: + +- Matlab via ```nargin, varargin``` using construction + ```if nargin==1, out=varargin{1}, else out=fsum(varargin{2:end}), end``` + +Julia solves this ```if``` at compile time. + +The generated code can be inspected by macro ```@code_llvm```? + +```julia +fsum(1,2,3) +@code_llvm fsum(1,2,3) +@code_llvm fsum(1.0,2.0,3.0) +fz()=fsum(1,2,3) +@code_llvm fz() +``` + +Note that each call of fsum generates a new and different function. + +Functions can act either as regular functions or like templates in C++. Compiler decides. + +This example is relatively simple, many other JIT languages can optimize such code. Julia allows taking this approach further. + + +Generality of the code: + +```julia +fsum('c',1) +fsum([1,2],[3,4],[5,6]) +``` + +Relies on *multiple dispatch* of the ```+``` function. + +More involved example: + +```julia +using Zygote + +f(x)=3x+1 # user defined function +@code_llvm f'(10) +``` + +The simplification was not achieved by the compiler alone. + +- Julia provides tools for AST and IR code manipulation +- automatic differentiation via IR manipulation is implemented in Zygote.jl +- in a similar way, debugger is implemented in Debugger.jl +- very simple to design *domain specific* language + +```julia +using Turing +using StatsPlots + +@model function gdemo(x, y) + s² ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s²)) + x ~ Normal(m, sqrt(s²)) + y ~ Normal(m, sqrt(s²)) +end +``` + + +Such tools allow building a very convenient user experience on abstract level, and reaching very efficient code. + +## Reproducible research + +Think about a code that was written some time ago. To run it, you often need to be able to have the same version of the language it was written for. + +- **Standard way** language freezes syntax and guarantees some back-ward compatibility (Matlab), which prevents future improvements + +- **Julia approach** allows easy recreation of the *environment* in which the code was developed. Every project (e.g. directory) can have its own environment + +::: tip Environment + +Is an independent set of packages that can be local to an individual project or shared and selected by name. + +::: + +::: tip Package + +A package is a source tree with a standard layout providing functionality that can be reused by other Julia projects. + +::: + +This allows Julia to be a rapidly evolving ecosystem with frequent changes due to: + +- built-in package manager +- switching between multiple versions of packages + + +### Package manager + +- implemented by Pkg.jl +- source tree have their structure defined by a convention +- have its own mode in REPL +- allows adding packages for using (```add```) or development (```dev```) +- supporting functions for creation (```generate```) and activation (```activate```) and many others + + +## Julia from user's point of view + + 1. **compilation** of everything to as specialized as possible + + ✅ very fast code + - ❌ slow interaction (caching...) + - ❌ generating libraries is harder + - think of ```fsum```, + - everything is ".h" (Eigen library) + - ❌ debugging is different to matlab/python + + 2. **extensibility**, Multiple dispatch = multi-functions + + ✅ allows great extensibility and code composition + - ❌ not (yet) mainstream thinking + - ❌ Julia is not Object-oriented + - ❌ Julia is (not pure) functional language + diff --git a/docs_vitepress/src/lectures/lecture_01/polynomial.svg b/docs_vitepress/src/lectures/lecture_01/polynomial.svg new file mode 100644 index 00000000..4a78757b --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/polynomial.svg @@ -0,0 +1,775 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + inline comment + creation/assignment of integer variable + function arguments untyped + function name + function body keywords + function return value + update operator + exponentiation operator + array/iterator indexing + for loop specification with range + + diff --git a/docs_vitepress/src/lectures/lecture_01/processor.gif b/docs_vitepress/src/lectures/lecture_01/processor.gif new file mode 100644 index 00000000..51995c23 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_01/processor.gif differ diff --git a/docs_vitepress/src/lectures/lecture_01/test_setup.jl b/docs_vitepress/src/lectures/lecture_01/test_setup.jl new file mode 100644 index 00000000..5ffd711e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_01/test_setup.jl @@ -0,0 +1,36 @@ +using InteractiveUtils: versioninfo +versioninfo() + +println("-------------------------------------------------------------------------") +println("Julia started from terminal without args: ", length(ARGS) == 0 ? "✔" : "✗") +println("Running from the same folder as this script: ", isfile("./test_setup.jl") ? "✔" : "✗") +println("Running Julia 1.6.0 or above: ", VERSION >= v"1.6.0" ? "✔" : "✗") + +name = try + readchomp(`git config user.name`) +catch + "" +end + +mail = try + readchomp(`git config user.email`) +catch + "" +end + + +println("Git Config Username: ", length(name) > 0 ? "✔" : "✗") +println("Git Config Email: ", length(mail) > 0 ? "✔" : "✗") + +s = try + using Pkg + Pkg.activate(".") + Pkg.add("BenchmarkTools") + true +catch e + @warn e + false +end + +println("Installed test environment for later use: ", s ? "✔" : "?") + diff --git a/docs_vitepress/src/lectures/lecture_02/Lab02Ecosystem.jl b/docs_vitepress/src/lectures/lecture_02/Lab02Ecosystem.jl new file mode 100644 index 00000000..24e8ea7e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_02/Lab02Ecosystem.jl @@ -0,0 +1,97 @@ +abstract type Agent end +abstract type Animal <: Agent end +abstract type Plant <: Agent end + + +mutable struct Grass <: Plant + const id::Int + size::Int + const max_size::Int +end + +Grass(id,m=10) = Grass(id, rand(1:m), m) + +function Base.show(io::IO, g::Grass) + x = g.size/g.max_size * 100 + # hint: to type the leaf in the julia REPL you can do: + # \:herb: + print(io,"🌿 #$(g.id) $(round(Int,x))% grown") +end + + +mutable struct Sheep <: Animal + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 +end + +Sheep(id, e=4.0, Δe=0.2, pr=0.8, pf=0.6) = Sheep(id,e,Δe,pr,pf) + +function Base.show(io::IO, s::Sheep) + e = s.energy + d = s.Δenergy + pr = s.reprprob + pf = s.foodprob + print(io,"🐑 #$(s.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + + +mutable struct Wolf <: Animal + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 +end + +Wolf(id, e=10.0, Δe=8.0, pr=0.1, pf=0.2) = Wolf(id,e,Δe,pr,pf) + +function Base.show(io::IO, w::Wolf) + e = w.energy + d = w.Δenergy + pr = w.reprprob + pf = w.foodprob + print(io,"🐺 #$(w.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + + +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + max_id = maximum(a.id for a in agents) + World(Dict(a.id=>a for a in agents), max_id) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end + + +function eat!(sheep::Sheep, grass::Grass, w::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +function eat!(wolf::Wolf, sheep::Sheep, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function reproduce!(a::A, w::World) where A<:Animal + a.energy = a.energy/2 + a_vals = [getproperty(a,n) for n in fieldnames(A) if n!=:id] + new_id = w.max_id + 1 + â = A(new_id, a_vals...) + w.agents[â.id] = â + w.max_id = new_id +end diff --git a/docs_vitepress/src/lectures/lecture_02/hw.md b/docs_vitepress/src/lectures/lecture_02/hw.md new file mode 100644 index 00000000..9060b156 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_02/hw.md @@ -0,0 +1,79 @@ +# Homework 2: Predator-Prey Agents + +In this lab you will continue working on your agent simulation. If you did not +manage to finish the homework, do not worry, you can use [this +script](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/2023W/docs/src/lecture_02/Lab02Ecosystem.jl) +which contains all the functionality we developed in the lab. + +```@setup hw02 +include("Lab02Ecosystem.jl") +``` + +## How to submit? + +Put all your code (including your or the provided solution of lab 2) in a script +named `hw.jl`. Zip only this file (not its parent folder) and upload it to +BRUTE. Your file cannot contain any package dependencies. For example, having a +`using Plots` in your code will cause the automatic evaluation to fail. + +## Counting Agents + +To monitor the different populations in our world we need a function that +counts each type of agent. For `Animal`s we simply have to count how +many of each type are currently in our `World`. In the case of `Plant`s +we will use the fraction of `size(plant)/max_size(plant)` as a measurement +quantity. + +::: danger Compulsory Homework (2 points) + +1. Implement a function `agent_count` that can be called on a single + `Agent` and returns a number between $(0,1)$ (i.e. always `1` for animals; + and `size(plant)/max_size(plant)` for plants). + +2. Add a method for a vector of agents `Vector{<:Agent}` which sums all + agent counts. + +3. Add a method for a `World` which returns a dictionary + that contains pairs of `Symbol`s and the agent count like below: + +```@setup hw02 +agent_count(p::Plant) = p.size / p.max_size +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::A) where A<:Agent + n = nameof(A) + if n in keys(d) + d[n] += agent_count(a) + else + d[n] = agent_count(a) + end + return d + end + foldl(op, w.agents |> values |> collect, init=Dict{Symbol,Real}()) +end +``` + +```@repl hw02 +grass1 = Grass(1,5,5); +agent_count(grass1) + +grass2 = Grass(2,1,5); +agent_count([grass1,grass2]) # one grass is fully grown; the other only 20% => 1.2 + +sheep = Sheep(3,10.0,5.0,1.0,1.0); +wolf = Wolf(4,20.0,10.0,1.0,1.0); +world = World([grass1, grass2, sheep, wolf]); +agent_count(world) +``` + +**Hint:** You can get the *name* of a type by using the `nameof` function: + +```@repl hw02 +nameof(Grass) +``` + +Use as much dispatch as you can! ;) + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_02/lab.md b/docs_vitepress/src/lectures/lecture_02/lab.md new file mode 100644 index 00000000..cf9be066 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_02/lab.md @@ -0,0 +1,346 @@ +# [Lab 2: Predator-Prey Agents](@id lab02) + +In the next labs you will implement your own *predator-prey model*. The model +will contain wolves, sheep, and - to feed your sheep - some grass. The final +simulation will be turn-based and the agents will be able to eat each other, +reproduce, and die in every iteration. +At every iteration of the simulation each agent will step forward in time +via the `agent_step!` function. The steps for the `agent_step!` methods of +animals and plants are written below in pseudocode. + +```pseudo +# for animals: +agent_step!(animal, world) + decrement energy by 1 + find & eat food (with probability pf) + die if no more energy + reproduce (with probability pr) + +# for plants: +agent_step!(plant, world) + grow if not at maximum size +``` + +The `world` in which the agents live will be the simplest possible world with +zero dimensions (i.e. a `Dict` of `ID=>Agent`). Running and plotting your final +result could look something like the plot below. + +![img](pred-prey.png) + +We will start implementing the basic functionality for each `Agent` like +`eat!`ing, `reproduce!`ing, and a very simplistic `World` for your agents to live in. +In the next lab you will refine both the type hierarchy of your `Agent`s, as well +as the design of the `World` in order to leverage the power of Julia's type system +and compiler. + +We start with a very basic type hierarchy: + +```julia +abstract type Agent end +abstract type Animal <: Agent end +abstract type Plant <: Agent end +``` + +We will implement the `World` for our `Agent`s later, but it will essentially be +implemented by a `Dict` which maps unique IDs to an `Agent`. Hence, every agent +will need an ID. + +## The `Grass` Agent + +Let's start by implementing some `Grass` which will later be able to grow +during each iteration of our simulation. + +::: warning Exercise + +1. Define a mutable `struct` called `Grass` which is a subtype of `Plant` has the fields + `id` (the unique identifier of this `Agent` - every agent needs one!), + `size` (the current size of the `Grass`), and `max_size`. All fields should be integers. +2. Define a constructor for `Grass` which, given only an ID and a maximum size + $m$, will create an instance of `Grass` that has a randomly initialized size in + the range $[1,m]$. It should also be possible to create `Grass`, just with an ID + and a default `max_size` of `10`. +3. Implement `Base.show(io::IO, g::Grass)` to get custom printing of your `Grass` such that + the `Grass` is displayed with its size in percent of its `max_size`. + +*Hint:* You can implement a custom `show` method for a new type `MyType` like this: + +```julia +struct MyType + x::Bool +end +Base.show(io::IO, a::MyType) = print(io, "MyType $(a.x)") +``` + +::: + +::: details Show solution + +Since Julia 1.8 we can also declare some fields of `mutable` structs as `const`, +which can be used both to prevent us from mutating immutable fields (such as the ID) +but can also be used by the compiler in certain cases. + +```julia +mutable struct Grass <: Plant + const id::Int + size::Int + const max_size::Int +end + +Grass(id,m=10) = Grass(id, rand(1:m), m) + +function Base.show(io::IO, g::Grass) + x = g.size/g.max_size * 100 + # hint: to type the leaf in the julia REPL you can do: + # \:herb: + print(io,"🌿 #$(g.id) $(round(Int,x))% grown") +end +``` + +::: + +Creating a few `Grass` agents can then look like this: +```julia +Grass(1,5) +g = Grass(2) +g.id = 5 +``` + + +## `Sheep` and `Wolf` Agents + +Animals are slightly different from plants. They will have an energy $E$, which +will be increase (or decrease) if the agent eats (or reproduces) by a certain +amount $\Delta E$. Later we will also need a probability to find food $p_f$ and +a probability to reproduce $p_r$.c + +::: warning Exercise + +1. Define two mutable structs `Sheep` and `Wolf` that are subtypes of `Animal` and have the fields + `id`, `energy`, `Δenergy`, `reprprob`, and `foodprob`. +2. Define constructors with the following default values: + - For 🐑: $E=4$, $\Delta E=0.2$, $p_r=0.8$, and $p_f=0.6$. + - For 🐺: $E=10$, $\Delta E=8$, $p_r=0.1$, and $p_f=0.2$. +3. Overload `Base.show` to get pretty printing for your two new animals. + +::: + +::: details Show solution for `Sheep` + +```julia +mutable struct Sheep <: Animal + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 +end + +Sheep(id, e=4.0, Δe=0.2, pr=0.8, pf=0.6) = Sheep(id,e,Δe,pr,pf) + +function Base.show(io::IO, s::Sheep) + e = s.energy + d = s.Δenergy + pr = s.reprprob + pf = s.foodprob + print(io,"🐑 #$(s.id) E=$e ΔE=$d pr=$pr pf=$pf") +end +``` + +::: + +::: details Show solution for `Wolf` + +```julia +mutable struct Wolf <: Animal + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 +end + +Wolf(id, e=10.0, Δe=8.0, pr=0.1, pf=0.2) = Wolf(id,e,Δe,pr,pf) + +function Base.show(io::IO, w::Wolf) + e = w.energy + d = w.Δenergy + pr = w.reprprob + pf = w.foodprob + print(io,"🐺 #$(w.id) E=$e ΔE=$d pr=$pr pf=$pf") +end +``` + +::: + +```julia +Sheep(4) +Wolf(5) +``` + + +## The `World` + +Before our agents can eat or reproduce we need to build them a `World`. +The simplest (and as you will later see, somewhat suboptimal) world is essentially +a `Dict` from IDs to agents. Later we will also need the maximum ID, lets define +a world with two fields: +```julia +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end +``` + +!!! warning "Exercise" + Implement a constructor for the `World` which accepts a vector of `Agent`s. + +::: details Show solution + +```julia +function World(agents::Vector{<:Agent}) + max_id = maximum(a.id for a in agents) + World(Dict(a.id=>a for a in agents), max_id) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for a in values(w.agents) + println(io," ",a) + end +end +``` + +::: + +## `Sheep` eats `Grass` + +We can implement the behaviour of our various agents with respect to each other +by leveraging Julia's multiple dispatch. + +!!! warning "Exercise" + Implement a function `eat!(::Sheep, ::Grass, ::World)` which increases the sheep's + energy by $\Delta E$ multiplied by the size of the grass. + + After the sheep's energy is updated the grass is eaten and its size counter has + to be set to zero. + + Note that you do not yet need the world in this function. It is needed later + for the case of wolves eating sheep. + +::: details Show solution + +```julia +function eat!(sheep::Sheep, grass::Grass, w::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +``` + +::: + +Below you can see how a fully grown grass is eaten by a sheep. The sheep's +energy changes `size` of the grass is set to zero. + +```julia +grass = Grass(1) +sheep = Sheep(2) +world = World([grass, sheep]) +eat!(sheep,grass,world); +world +``` + +Note that the order of the arguments has a meaning here. Calling +`eat!(grass,sheep,world)` results in a `MethodError` which is great, because +`Grass` cannot eat `Sheep`. + +```julia +eat!(grass,sheep,world); +``` + + +## `Wolf` eats `Sheep` + +!!! warning "Exercise" + The `eat!` method for wolves increases the wolf's energy by `sheep.energy * + wolf.Δenergy` and kills the sheep (i.e. removes the sheep from the world). + There are other situations in which agents die , so it makes sense to implement + another function `kill_agent!(::Animal,::World)`. + + Hint: You can use `delete!` to remove agents from the dictionary in your world. + +::: details Show solution + +```julia +function eat!(wolf::Wolf, sheep::Sheep, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) +``` + +::: + + +With a correct `eat!` method you should get results like this: + +```julia +grass = Grass(1); +sheep = Sheep(2); +wolf = Wolf(3); +world = World([grass, sheep, wolf]) +eat!(wolf,sheep,world); +world +``` + +The sheep is removed from the world and the wolf's energy increased by $\Delta E$. + + +## Reproduction + +Currently our animals can only eat. In our simulation we also want them to +reproduce. We will do this by adding a `reproduce!` method to `Animal`. + +!!! warning "Exercise" + Write a function `reproduce!` that takes an `Animal` and a `World`. + Reproducing will cost an animal half of its energy and then add an almost + identical copy of the given animal to the world. The only thing that is + different from parent to child is the ID. You can simply increase the `max_id` + of the world by one and use that as the new ID for the child. + +::: details Show solution + +```julia +function reproduce!(a::Animal, w::World) + a.energy = a.energy/2 + new_id = w.max_id + 1 + â = deepcopy(a) + â.id = new_id + w.agents[â.id] = â + w.max_id = new_id +end +``` + +You can avoid mutating the `id` field (which could be considered bad practice) +by reconstructing the child from scratch: + +```julia +function reproduce!(a::A, w::World) where A<:Animal + a.energy = a.energy/2 + a_vals = [getproperty(a,n) for n in fieldnames(A) if n!=:id] + new_id = w.max_id + 1 + â = A(new_id, a_vals...) + w.agents[â.id] = â + w.max_id = new_id +end +``` + +::: + +```julia +s1, s2 = Sheep(1), Sheep(2) +w = World([s1, s2]) +reproduce!(s1, w); +w +``` diff --git a/docs_vitepress/src/lectures/lecture_02/lecture.md b/docs_vitepress/src/lectures/lecture_02/lecture.md new file mode 100644 index 00000000..43259967 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_02/lecture.md @@ -0,0 +1,739 @@ +# [Motivation](@id type_lecture) + +Before going into the details of Julia's type system, we will spend a few minutes motivating the roles of a type system, which are: + +1. Structuring code +2. Communicating to the compiler how a type will be used + +The first aspect is important for the convenience of the programmer and enables abstractions +in the language, the latter aspect is important for the speed of the generated code. *Writing efficient Julia code is best viewed as a dialogue between the programmer and the compiler.* [^1] + +Type systems according to [Wikipedia](https://en.wikipedia.org/wiki/Data_type): + +* In computer science and computer programming, a **data type** or simply **type** is an attribute of data which tells the compiler or interpreter how the programmer intends to use the data. +* A **type system** is a logical system comprising a set of rules that assigns a property called a type to the various constructs of a computer program, such as variables, expressions, functions or modules. These types formalize and enforce the otherwise implicit categories the programmer uses for algebraic data types, data structures, or other components. [Good short answer to Static vs. Dynamic types](https://stackoverflow.com/a/34004445) + +## Structuring the code / enforcing the categories + +The role of **structuring** the code and imposing semantic restriction +means that the type system allows you to logically divide your program, +and to prevent certain types of errors. +Consider for example two types, `Wolf` and `Sheep` which share the same +definition but the types have different names. + +```julia +struct Wolf + name::String + energy::Int +end + +struct Sheep + name::String + energy::Int +end +``` + +This allows us to define functions applicable only to the corresponding type + +```julia +howl(wolf::Wolf) = println(wolf.name, " has howled.") +baa(sheep::Sheep) = println(sheep.name, " has baaed.") +``` + +Therefore the compiler (or interpreter) **enforces** that a wolf can only `howl` +and never `baa` and vice versa a sheep can only `baa`. In this sense, it ensures +that `howl(sheep)` and `baa(wolf)` never happen. + +```julia +baa(Sheep("Karl",3)) +baa(Wolf("Karl",3)) +``` + +Notice the type of error of the latter call `baa(Wolf("Karl",3))`. Julia raises `MethodError` which states that it has failed to find a function `baa` for the type `Wolf` (but there is a function `baa` for type `Sheep`). + +For comparison, consider an alternative definition which does not have specified types + +```julia +bark(animal) = println(animal.name, " has howled.") +baa(animal) = println(animal.name, " has baaed.") +``` + +in which case the burden of ensuring that a wolf will never baa rests upon the +programmer which inevitably leads to errors (note that severely constrained +type systems are difficult to use). + +## Intention of use and restrictions on compilers + +Types play an important role in generating efficient code by a compiler, because they tells the compiler which operations are permitted, prohibited, and can indicate invariants of type (e.g. constant size of an array). If compiler knows that something is invariant (constant), it can exploit such information. As an example, consider the following two alternatives to represent a set of animals: + +```julia +a = [Wolf("1", 1), Wolf("2", 2), Sheep("3", 3)] +b = (Wolf("1", 1), Wolf("2", 2), Sheep("3", 3)) +``` + +where `a` is an array which can contain arbitrary types and have arbitrary length +whereas `b` is a `Tuple` which has fixed length in which the first two items are of type `Wolf` +and the third item is of type `Sheep`. Moreover, consider a function which calculates the +energy of all animals as + +```julia +energy(animals) = mapreduce(x -> x.energy, +, animals) +``` + +A good compiler makes use of the information provided by the type system to generate efficient code which we can verify by inspecting the compiled code using `@code_native` macro + +```julia +@code_native debuginfo=:none energy(a) +@code_native debuginfo=:none energy(b) +``` + +one observes the second version produces more optimal code. Why is that? + +* In the first representation, `a`, the animals are stored in an `Array{Any}` which can have arbitrary size and can contain arbitrary animals. This means that the compiler has to compile `energy(a)` such that it works on such arrays. +* In the second representation, `b`, the animals are stored in a `Tuple`, which specializes for lengths and types of items. This means that the compiler knows the number of animals and the type of each animal on each position within the tuple, which allows it to specialize. + +This difference will indeed have an impact on the time of code execution. +On my i5-8279U CPU, the difference (as measured by BenchmarkTools) is + +```julia +julia> using BenchmarkTools + +julia> @btime energy($(a)) + 70.2 ns (0 allocations: 0 bytes) + +julia> @btime energy($(b)) + 2.62 ns (0 allocations: 0 bytes) + +``` + +Which nicely demonstrates that the choice of types affects performance. Does it mean that we should always use `Tuples` instead of `Arrays`? Surely not, it is just that each is better for different use-cases. Using Tuples means that the compiler will compile a special function for each length of tuple and each combination of types of items it contains, which is clearly wasteful. + +# [Julia's type system](@id type_system) + +## Julia is dynamicaly typed + +Julia's type system is dynamic, which means that all types are resolved during runtime. **But**, if the compiler can infer types of all variables of the called function, it can specialize the function for that given type of variables which leads to efficient code. Consider a modified example where we represent two wolfpacks: + +```julia +wolfpack_a = [Wolf("1", 1), Wolf("2", 2), Wolf("3", 3)] +wolfpack_b = Any[Wolf("1", 1), Wolf("2", 2), Wolf("3", 3)] +``` + +`wolfpack_a` carries a type `Vector{Wolf}` while `wolfpack_b` has the type `Vector{Any}`. This means that in the first case, the compiler knows that all items are of the type `Wolf`and it can specialize functions using this information. In case of `wolfpack_b`, it does not know which animal it will encounter (although all are of the same type), and therefore it needs to dynamically resolve the type of each item upon its use. This ultimately leads to less performant code. + +```julia +@benchmark energy($(wolfpack_a)) +@benchmark energy($(wolfpack_b)) +``` + +```julia + 3.7 ns (0 allocations: 0 bytes) + 69.4 ns (0 allocations: 0 bytes) +``` + +To conclude, julia is indeed a dynamically typed language, **but** if the compiler can infer all types in a called function in advance, it does not have to perform the type resolution during execution, which produces performant code. This means and in hot (performance critical) parts of the code, you should be type stable, in other parts, it is not such big deal. + +## Classes of types + +Julia divides types into three classes: primitive, composite, and abstract. + +### Primitive types + +Citing the [documentation](https://docs.julialang.org/en/v1/manual/types/#Primitive-Types): *A primitive type is a concrete type whose data consists of plain old bits. Classic examples of primitive types are integers and floating-point values. Unlike most languages, Julia lets you declare your own primitive types, rather than providing only a fixed set of built-in ones. In fact, the standard primitive types are all defined in the language itself.* + +The definition of primitive types look as follows + +```julia +primitive type Float16 <: AbstractFloat 16 end +primitive type Float32 <: AbstractFloat 32 end +primitive type Float64 <: AbstractFloat 64 end +``` + +and they are mainly used to jump-start julia's type system. It is rarely needed to +define a special primitive type, as it makes sense only if you define special functions +operating on its bits. This is almost exclusively used for exposing special operations +provided by the underlying CPU / LLVM compiler. For example `+` for `Int32` is different +from `+` for `Float32` as they call a different intrinsic operations. You can inspect this +jump-starting of the type system yourself by looking at Julia's source. + +```julia +julia> @which +(1,2) ++(x::T, y::T) where T<:Union{Int128, Int16, Int32, Int64, Int8, UInt128, UInt16, UInt32, UInt64, UInt8} in Base at int.jl:87 +``` + +At `int.jl:87` + +```julia +(+)(x::T, y::T) where {T<:BitInteger} = add_int(x, y) +``` + +we see that `+` of integers is calling the function `add_int(x, y)`, which is defined in the core +part of the compiler in `Intrinsics.cpp` (yes, in C++), exposed in `Core.Intrinsics` + +From Julia docs: *Core is the module that contains all identifiers considered "built in" to +the language, i.e. part of the core language and not libraries. Every module implicitly +specifies using Core, since you can't do anything without those definitions.* + +Primitive types are rarely used, and they will not be used in this course. We mention them for the sake of completeness and refer the reader to the official Documentation (and source code of Julia). + +An example of use of primitive type is a definition of one-hot vector in the library `PrimitiveOneHot` as + +```julia +primitive type OneHot{K} <: AbstractOneHotArray{1} 32 end +``` + +where `K` is the dimension of the one-hot vector. + +### Abstract types + +An abstract type can be viewed as a set of concrete types. For example, an +`AbstractFloat` represents the set of concrete types `(BigFloat,Float64,Float32,Float16)`. +This is used mainly to define general methods for sets of types for which we expect the same behavior (recall the Julia design motivation: *if it quacks like a duck, waddles like a duck and looks like a duck, chances are it's a duck*). Abstract types are defined with `abstract type TypeName end`. For example the following set of abstract types defines part of julia's number system. + +```julia +abstract type Number end +abstract type Real <: Number end +abstract type Complex <: Number end +abstract type AbstractFloat <: Real end +abstract type Integer <: Real end +abstract type Signed <: Integer end +abstract type Unsigned <: Integer end +``` + +where `<:` means "is a subtype of" and it is used in declarations where the right-hand is an immediate supertype of a given type (`Integer` has the immediate supertype `Real`.) If the supertype is not supplied, it is considered to be Any, therefore in the above definition `Number` has the supertype `Any`. + +We can list childrens of an abstract type using function `subtypes` + +```julia +using InteractiveUtils: subtypes # hide + +subtypes(AbstractFloat) +``` + +and we can also list the immediate `supertype` or climb the ladder all the way to `Any` using `supertypes` + +```julia +using InteractiveUtils: supertypes # hide + +supertypes(AbstractFloat) +``` + +`supertype` and `subtypes` print only types defined in Modules that are currently loaded to your workspace. For example with Julia without any Modules, `subtypes(Number)` returns `[Complex, Real]`, whereas if I load `Mods` package implementing numbers defined over finite field, the same call returns `[Complex, Real, AbstractMod]`. + +It is relatively simple to print a complete type hierarchy of + +```julia +using AbstractTrees + +function AbstractTrees.children(t::Type) + t === Function ? Vector{Type}() : filter!(x -> x !== Any,subtypes(t)) +end +AbstractTrees.printnode(io::IO,t::Type) = print(io,t) + +print_tree(Number) +``` + +The main role of abstract types allows is in function definitions. They allow to define functions that can be used on variables with types with a given abstract type as a supertype. For example we can define a `sgn` function for **all** real numbers as + +```julia +sgn(x::Real) = x > 0 ? 1 : x < 0 ? -1 : 0 +``` + +and we know it would be correct for all real numbers. This means that if anyone creates +a new subtype of `Real`, the above function can be used. This also means that +**it is expected** that comparison operations are defined for any real number. Also notice that +`Complex` numbers are excluded, since they do not have a total order. + +For unsigned numbers, the `sgn` can be simplified, as it is sufficient to verify if they are different (greater) than zero, therefore the function can read + +```julia +sgn(x::Unsigned) = x > 0 ? 1 : 0 +``` + +and again, it applies to all numbers derived from `Unsigned`. Recall that +`Unsigned <: Integer <: Real,` how does Julia decide, +which version of the function `sgn` to use for `UInt8(0)`? It chooses the most +specific version, and thus for `sgn(UInt8(0))` it will use `sgn(x::Unsinged)`. +If the compiler cannot decide, typically it encounters an ambiguity, it throws an error +and recommends which function you should define to resolve it. + +The above behavior allows to define default "fallback" implementations and while allowing +to specialize for sub-types. A great example is matrix multiplication, which has a +generic (and slow) implementation with many specializations, which can take advantage +of structure (sparse, banded), or use optimized implementations (e.g. blas implementation +for dense matrices with eltype `Float32` and `Float64`). + +::: tip Posit Numbers + +[Posit numbers](https://posithub.org/) are an alternative to IEEE764 format to store floating points (`Real`) numbers. +They offer higher precision around zero and wider dynamic range of representable numbers. +When julia performs matrix multiplication with `Float32` / `Float64`, it will use BLAS routines written in C, which are very performant. + +```julia +julia> A = rand(Float32, 32, 32) + +julia> B = rand(Float32, 32, 16) + +julia> A * B + +julia> @which A*B +*(A::Union{LinearAlgebra.Adjoint{<:Any, <:StridedMatrix{var"#s994"}}, LinearAlgebra.Transpose{<:Any, <:StridedMatrix{var"#s994"}}, StridedMatrix{var"#s994"}} where var"#s994"<:Union{Float32, Float64}, B::Union{LinearAlgebra.Adjoint{<:Any, <:StridedMatrix{var"#s128"}}, LinearAlgebra.Transpose{<:Any, <:StridedMatrix{var"#s128"}}, StridedMatrix{var"#s128"}} where var"#s128"<:Union{Float32, Float64}) + @ LinearAlgebra ~/.julia/juliaup/julia-1.10.5+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/LinearAlgebra/src/matmul.jl:111 +``` + +We can now convert both matrices to posit representation. We can still multiply them, but the function which will now perform the multiplication is a generic multiplication. + +```julia +julia> using SoftPosit + +julia> A = Posit32.(A) + +julia> B = Posit32.(B) + +julia> A * B + +julia> @which A*B +*(A::AbstractMatrix, B::AbstractMatrix) + @ LinearAlgebra ~/.julia/juliaup/julia-1.10.5+0.aarch64.apple.darwin14/share/julia/stdlib/v1.10/LinearAlgebra/src/matmul.jl:104 +``` + +::: + +Again, Julia does not make a difference between abstract types defined in `Base` +libraries shipped with the language and those defined by you (the user). All are treated +the same. + +[From Julia documentation](https://docs.julialang.org/en/v1/manual/types/#man-abstract-types): +Abstract types cannot be instantiated, which means that we cannot create a variable that +would have an abstract type (try `typeof(Number(1f0))`). Also, abstract types cannot have +any fields, therefore there is no composition (there are lengthy discussions of why this is so, +one of the most definite arguments of creators is that abstract types with fields frequently lead +to children types not using some fields (consider circle vs. ellipse)). + +### [Composite types](@id composite_types) + +Composite types are similar to `struct` in C (they even have the same memory layout) as they logically join together other types. It is not a great idea to think about them as objects (in OOP sense), because objects tie together *data* and *functions* on owned data. Contrary in Julia (as in C), functions operate on data of structures, but are not tied to them and they are defined outside them. Composite types are workhorses of Julia's type system, as user-defined types are mostly composite (or abstract). + +Composite types are defined using `struct TypeName [fields] end`. To define a position of an animal on the Euclidean plane as a type, we would write + +```julia +struct PositionF64 + x::Float64 + y::Float64 +end +``` + +which defines a structure with two fields `x` and `y` of type `Float64`. Julia's compiler creates a default constructor, where both (but generally all) arguments are converted using `(convert(Float64, x), convert(Float64, y)` to the correct type. This means that we can construct a PositionF64 with numbers of different type that are convertable to Float64, e.g. `PositionF64(1,1//2)` but we cannot construct `PositionF64` where the fields would be of different type (e.g. `Int`, `Float32`, etc.) or they are not trivially convertable (e.g. `String`). + +Fields in composite types do not have to have a specified type. We can define a `VaguePosition` without specifying the type + +```julia +struct VaguePosition + x + y +end +``` + +This works as the definition above except that the arguments are not converted to `Float64` now. One can store different values in `x` and `y`, for example `String` (e.g. VaguePosition("Hello","world")). Although the above definition might be convenient, it limits the compiler's ability to specialize, as the type `VaguePosition` does not carry information about type of `x` and `y`, which has a negative impact on the performance. Let's demonstrate it on an example of random walk. where we implement function `move` which move position by position. + +```julia +using BenchmarkTools + +move(a,b) = typeof(a)(a.x+b.x, a.y+b.y) + +δx = [PositionF64(rand(), rand()) for _ in 1:100] +x₀ = PositionF64(rand(), rand()) +δy = [VaguePosition(rand(), rand()) for _ in 1:100] +y₀ = VaguePosition(rand(), rand()) + +@benchmark foldl(move, $(δx); init = $(x₀)) +@benchmark foldl(move, $(δy); init = $(y₀)) +``` + +Giving fields of a composite type an abstract type does not really solve the problem of the compiler not knowing the type. In this example, it still does not know, if it should use instructions for `Float64` or `Int8`. From the perspective of generating optimal code, the definition of `LessVaguePosition` and `VaguePosition` are equally uninformative to the compiler as it cannot assume anything about the code. However, the `LessVaguePosition` will ensure that the position will contain only numbers, hence catching trivial errors like instantiating `VaguePosition` with non-numeric types for which arithmetic operators will not be defined (recall the discussion on the beginning of the lecture). + +```julia +struct LessVaguePosition + x::Real + y::Real +end + +δz = [LessVaguePosition(rand(), rand()) for _ in 1:100] +z₀ = LessVaguePosition(rand(), rand()) + +@benchmark foldl(move, $(δz); init = $(z₀)) +nothing #hide +``` + +All structs defined above are immutable (as we have seen above in the case of `Tuple`), which means that one cannot change a field (unless the struct wraps a container, like and array, which allows that). For example this raises an error + +```julia +a = LessVaguePosition(1,2) +a.x = 2 +``` + +If one needs to make a struct mutable, use the keyword `mutable` before the keyword `struct` as + +```julia +mutable struct MutablePosition + x::Float64 + y::Float64 +end +``` + +In mutable structures, we can change the values of fields. + +```julia +a = MutablePosition(1e0, 2e0) +a.x = 2; +a +``` + +::: tip Mutable and Non-Mutable structs + +The functional difference between those is that you are not allowed to change fields of non-mutable structures. Therefore when the structure needs to be changed, you need to *construct* new structure, which means calling constructor, where you can enforce certain properties (through inner constructor). + +There is also difference for the compiler. When a struct is non-mutable, the fields in memory stores the actual values, and therefore they can be stored on stack (this is possible only if all fields are of primitive types). The operations are fast, because there is no pointer dereferencing, there is no need to allocate memory (when they can be store on stack), and therefore less presure on Garbage Collector. When stack is mutable, the structure contains a pointer to a memory location on the heap, which contains the value. This means to access the value, we first need to read the pointer to the memory location and then read the value (two fetches from the memory). Moreover, the value has to be stored on Heap, which means that we need to allocate memory during construction, which is expensive. + +::: + +The difference can be seen from + +```julia +a, b = PositionF64(1,2), PositionF64(1,2) +@code_native debuginfo=:none move(a,b) + +a, b = MutablePosition(1,2), MutablePosition(1,2) +@code_native debuginfo=:none move(a,b) +``` + +Why there is just one addition? + +Also, the mutability is costly. + +```julia +δx = [PositionF64(rand(), rand()) for _ in 1:100] +x₀ = PositionF64(rand(), rand()) +δz = [MutablePosition(rand(), rand()) for _ in 1:100] +z₀ = MutablePosition(rand(), rand()) + +@benchmark foldl(move, $(δx); init = $(x₀)) +@benchmark foldl(move, $(δz); init = $(z₀)) +``` + +### Parametric types + +So far, we had to trade-off flexibility for generality in type definitions. We either have structured with a fixed type, which are fast, or we had structures with a general type information, but they are slow. Can we have both? The answer is affirmative. The way to achieve this **flexibility** in definitions of the type while being able to generate optimal code is to **parametrize** the type definition. This is achieved by replacing types with a parameter (typically a single uppercase character) and decorating in definition by specifying different type in curly brackets. For example + +```julia +struct PositionT{T} + x::T + y::T +end + +u64 = [PositionT(rand(), rand()) for _ in 1:100] +u32 = [PositionT(rand(Float32), rand(Float32)) for _ in 1:100] + +@benchmark foldl(move, $(u64)) +@benchmark foldl(move, $(u32)) +``` + +Notice that the compiler can take advantage of specializing for different types (which does not have an effect here as in modern processors addition of `Float` and `Int` takes the same time). + +```julia +v = [PositionT(Int8(rand(1:100)), Int8(rand(1:100))) for _ in 1:100]; +@benchmark reduce(move, v) +nothing #hide +``` + +The above definition suffers the same problem as `VaguePosition`, which is that it allows us to instantiate the `PositionT` with non-numeric types, e.g. `String`. We solve this by restricting the types `T` to be children of some supertype, in this case `Real` + +```julia +struct Position{T<:Real} + x::T + y::T +end +``` + +which will throw an error if we try to initialize it with `Position("1.0", "2.0")`. Notice the flexibility we have achieved. We can use `Position` to store (and later compute) not only over `Float32` / `Float64` but any real numbers defined by other packages, for example with `Posit`s. + +```julia +using SoftPosit +p32 = [Position(Posit32(rand(Float32)), Posit32(rand(Float32))) for _ in 1:100]; + +@benchmark foldl(move, $(p32)) +``` +The above test with `Posit` is slow, because there is no hardware support for their operations. +Trying to construct the `Position` with different type of real numbers will fail, example `Position(1f0,1e0),` because through type definition we enforce them to have equal type. + +Naturally, fields in structures can be of different types, as is in the below pointless example. + +```julia +struct PositionXY{X<:Real, Y<:Real} + x::X + y::Y +end +``` + +The type can be parametrized by a concrete types. This is useful to communicate the compiler some useful informations, for example size of arrays. + +```julia +struct PositionZ{T<:Real,Z} + x::T + y::T +end + +PositionZ{Int64,1}(1,2) +``` + +### Abstract parametric types + +Like Composite types, Abstract types can also have parameters. These parameters define types that are common for all child types. A very good example is Julia's definition of arrays of arbitrary dimension `N` and type `T` of its items as + +```julia +abstract type AbstractArray{T,N} end +``` + +Different `T` and `N` give rise to different variants of `AbstractArrays`, +therefore `AbstractArray{Float32,2}` is different from `AbstractArray{Float64,2}` +and from `AbstractArray{Float64,1}.` Note that these are still `Abstract` types, +which means you cannot instantiate them. Their purpose is + +* to allow to define operations for broad class of concrete types +* to inform the compiler about constant values, which can be used + +Notice in the above example that parameters of types do not have to be types, but can also be values of primitive types, as in the above example of `AbstractArray` `N` is the number of dimensions which is an integer value. + +For convenience, it is common to give some important partially instantiated Abstract types an **alias**, for example `AbstractVector` as + +```julia +const AbstractVector{T} = AbstractArray{T,1} +``` + +is defined in `array.jl:23` (in Julia 1.6.2), which allows us to define for example general prescription for the `dot` product of two abstract vectors as + +```julia +function dot(a::AbstractVector, b::AbstractVector) + @assert length(a) == length(b) + mapreduce(*, +, a, b) +end +``` + +You can verify that the above general function can be compiled to performant code if +specialized for particular arguments. + +```julia +using InteractiveUtils: @code_native +@code_native debuginfo=:none mapreduce(*,+, [1,2,3], [1,2,3]) +``` + +## More on the use of types in function definitions + +### Terminology + +A *function* refers to a set of "methods" for a different combination of type parameters (the term function can be therefore considered as referring to a mere **name**). *Methods* define different behavior for different types of arguments for a given function. For example + +```julia +move(a::Position, b::Position) = Position(a.x + b.x, a.y + b.y) +move(a::Vector{<:Position}, b::Vector{<:Position}) = move.(a,b) +``` + +`move` refers to a function with methods `move(a::Position, b::Position)` and `move(a::Vector{<:Position}, b::Vector{<:Position})`. When different behavior on different types is defined by a programmer, as shown above, it is also called *implementation specialization*. There is another type of specialization, called *compiler specialization*, which occurs when the compiler generates different functions for you from a single method. For example for + +```julia +move(Position(1,1), Position(2,2)) +move(Position(1.0,1.0), Position(2.0,2.0)) +move(Position(Posit8(1),Posit8(1)), Position(Posit8(2),Posit8(2))) +``` + +### Frequent problems + +* Why does the following fail? + +```julia +foo(a::Vector{Real}) = println("Vector{Real}") +foo([1.0,2,3]) +``` + +Julia's type system is **invariant**, which means that `Vector{Real}` is different from `Vector{Float64}` and from `Vector{Float32}`, even though `Float64` and `Float32` are sub-types of `Real`. Therefore `typeof([1.0,2,3])` isa `Vector{Float64}` which is not subtype of `Vector{Real}.` For **covariant** languages, this would be true. For more information on variance in computer languages, [see here](https://en.wikipedia.org/wiki/Covariance_and_contravariance_(computer_science)). If the above definition of `foo` should be applicable to all vectors which has elements of subtype of `Real` we have define it as + +```julia +foo(a::Vector{T}) where {T<:Real} = println("Vector{T} where {T<:Real}") +``` + +or equivalently but more tersely as + +```julia +foo(a::Vector{<:Real}) = println("Vector{T} where {T<:Real}") +``` + +* **Diagonal rule** says that a repeated type in a method signature has to be a concrete type (this is to avoid ambiguity if the repeated type is used inside function definition to define a new variable to change type of variables). Consider for example the function below + +```julia +move(a::T, b::T) where {T<:Position} = T(a.x + by.x, a.y + by.y) +``` + +we cannot call it with `move(Position(1.0,2.0), Position(1,2))`, since in this case `Position(1.0,2.0)` is of type `Position{Float64}` while `Position(1,2)` is of type `Position{Int64}`. + +The **Diagonal rule** applies to parametric types as well. + +```julia +move(a::Position{T}, b::Position{T}) where {T} = T(a.x + by.x, a.y + by.y) +``` + +* When debugging why arguments do not match a particular method definition, it is useful to use `typeof`, `isa`, and `<:` commands. For example + +```julia +typeof(Position(1.0,2.0)) +typeof(Position(1,2)) +Position(1,2) isa Position{Float64} +Position(1,2) isa Position{Real} +Position(1,2) isa Position{<:Real} +typeof(Position(1,2)) <: Position{<:Float64} +typeof(Position(1,2)) <: Position{<:Real} +``` + +## Intermezzo: How does the Julia compiler work? + +Let's walk through an example. Consider the following definitions + +```julia +move(a::Position, by::Position) = Position(a.x + by.x, a.y + by.y) +move(a::T, by::T) where {T<:Position} = Position(a.x + by.x, a.y + by.y) +move(a::Position{Float64}, by::Position{Float64}) = Position(a.x + by.x, a.y + by.y) +move(a::Vector{<:Position}, by::Vector{<:Position}) = move.(a, by) +move(a::Vector{<:Position}, by::Position) = move.(a, by) +``` + +and a function call + +```julia +a = Position(1.0, 1.0) +by = Position(2.0, 2.0) +move(a, by) +``` + +1. The compiler knows that you call the function `move`. +2. The compiler infers the type of the arguments. You can view the result with `(typeof(a),typeof(by))` +3. The compiler identifies all `move`-methods with arguments of type `(Position{Float64}, Position{Float64})`: + `m = Base.method_instances(move, (typeof(a), typeof(by)), Base.get_world_counter())` +4. a) If the method has been specialized (compiled), then the arguments are prepared and the method is invoked. The compiled specialization can be seen from `m.cache` + +4. b) If the method has not been specialized (compiled), the method is compiled for the given type of arguments and continues as in step 4a. +A compiled function is therefore a "blob" of **native code** living in a particular memory location. When Julia calls a function, it needs to pick the right block corresponding to a function with particular type of parameters. + +If the compiler cannot narrow the types of arguments to concrete types, it has to perform the above procedure inside the called function, which has negative effects on performance, as the type resolution and identification of the methods can be slow, especially for methods with many arguments (e.g. 30ns for a method with one argument, +100 ns for method with two arguments). **You always want to avoid run-time resolution inside the performant loop!!!** +Recall the above example + +```julia +wolfpack_a = [Wolf("1", 1), Wolf("2", 2), Wolf("3", 3)] +wolfpack_b = Any[Wolf("1", 1), Wolf("2", 2), Wolf("3", 3)] +@benchmark energy($(wolfpack_a)) +@benchmark energy($(wolfpack_b)) +``` + +An interesting intermediate between fully abstract and fully concrete type happens, when the compiler knows that arguments have abstract type, which is composed of a small number of concrete types. This case called Union-Splitting, which happens when there is just a little bit of uncertainty. Julia will do something like + +```julia +argtypes = typeof(args) +push!(execution_stack, args) + +if T == Tuple{Int, Bool} + @goto compiled_blob_1234 +else # the only other option is Tuple{Float64, Bool} + @goto compiled_blob_1236 +end +``` + +For example + +```julia +const WolfOrSheep = Union{Wolf, Sheep} +wolfpack_c = WolfOrSheep[Wolf("1", 1), Wolf("2", 2), Wolf("3", 3)] + +@benchmark energy($(wolfpack_c)) +``` + +Thanks to union splitting, Julia is able to have performant operations on arrays with undefined / missing values for example + +```julia +[1, 2, 3, missing] |> typeof +``` + +### More on matching methods and arguments + +In the above process, the step, where Julia looks for a method instance with corresponding parameters can be very confusing. The rest of this lecture will focus on this. For those who want to have a formal background, we recommend [talk of Francesco Zappa Nardelli](https://www.youtube.com/watch?v=Y95fAipREHQ) and / or the one of [Jan Vitek](https://www.youtube.com/watch?v=LT4AP7CUMAw). + +When Julia needs to specialize a method instance, it needs to find it among multiple definitions. A single function can have many method instances, see for example `methods(+)` which lists all method instances of the `+`-function. How does Julia select the proper one? + +1. It finds all methods where the type of arguments match or are subtypes of restrictions on arguments in the method definition. +2. If there are multiple matches, the compiler selects the most specific definition. + +3. If the compiler cannot decide, which method instance to choose, it throws an error. + +```julia +confused_move(a::Position{Float64}, by) = Position(a.x + by.x, a.y + by.y) +confused_move(a, by::Position{Float64}) = Position(a.x + by.x, a.y + by.y) +confused_move(Position(1.0,2.0), Position(1.0,2.0)) +``` + +4. If it cannot find a suitable method, it throws an error. + +```julia +move(Position(1,2), VaguePosition("hello","world")) +``` + +Some examples: Consider following definitions + +```julia +move(a::Position, by::Position) = Position(a.x + by.x, a.y + by.y) +move(a::T, by::T) where {T<:Position} = T(a.x + by.x, a.y + by.y) +move(a::Position{Float64}, by::Position{Float64}) = Position(a.x + by.x, a.y + by.y) +move(a::Vector{<:Position}, by::Vector{<:Position}) = move.(a, by) +move(a::Vector{T}, by::Vector{T}) where {T<:Position} = move.(a, by) +move(a::Vector{<:Position}, by::Position) = move.(a, by) +``` + +Which method will compiler select for + +```julia +move(Position(1.0,2.0), Position(1.0,2.0)) +``` + +The first three methods match the types of arguments, but the compiler will select the third one, since it is the most specific. + +Which method will compiler select for + +```julia +move(Position(1,2), Position(1,2)) +``` + +Again, the first and second method definitions match the argument, but the second is the most specific. + +Which method will the compiler select for + +```julia +move([Position(1,2)], [Position(1,2)]) +``` + +Again, the fourth and fifth method definitions match the argument, but the fifth is the most specific. + +```julia +move([Position(1,2), Position(1.0,2.0)], [Position(1,2), Position(1.0,2.0)]) +``` + +### A bizzare definition which you can encounter + +The following definition of a one-hot matrix is taken from [Flux.jl](https://github.com/FluxML/Flux.jl/blob/1a0b51938b9a3d679c6950eece214cd18108395f/src/onehot.jl#L10-L12) + +```julia +struct OneHotArray{T<:Integer, L, N, var"N+1", I<:Union{T,AbstractArray{T, N}}} <: AbstractArray{Bool, var"N+1"} + indices::I +end +``` + +The parameters of the type carry information about the type used to encode the position of `one` in each column in `T`, the dimension of one-hot vectors in `L`, the dimension of the storage of `indices` in `N` (which is zero for `OneHotVector` and one for `OneHotMatrix`), number of dimensions of the `OneHotArray` in `var"N+1"` and the type of underlying storage of indices `I`. + + +[^1]: [Type Stability in Julia, Pelenitsyn et al., 2021](https://arxiv.org/pdf/2109.01950.pdf) diff --git a/docs_vitepress/src/lectures/lecture_02/pred-prey.png b/docs_vitepress/src/lectures/lecture_02/pred-prey.png new file mode 100644 index 00000000..e33e31ba Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_02/pred-prey.png differ diff --git a/docs_vitepress/src/lectures/lecture_03/Lab03Ecosystem.jl b/docs_vitepress/src/lectures/lecture_03/Lab03Ecosystem.jl new file mode 100644 index 00000000..b2de55a9 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_03/Lab03Ecosystem.jl @@ -0,0 +1,149 @@ +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +# instead of Symbols we can use an Enum for the sex field +# using an Enum here makes things easier to extend in case you +# need more than just binary sexes and is also more explicit than +# just a boolean +@enum Sex female male + +########## World ############################################################# + +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + max_id = maximum(a.id for a in agents) + World(Dict(a.id=>a for a in agents), max_id) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end + + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 + const sex::Sex +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,s::Sex) where T + Animal{A}(id,E,ΔE,pr,pf,s) +end + +# get the per species defaults back +randsex() = rand(instances(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, s=randsex()) = Sheep(id, E, ΔE, pr, pf, s) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, s=randsex()) = Wolf(id, E, ΔE, pr, pf, s) + + +function Base.show(io::IO, a::Animal{A}) where {A<:AnimalSpecies} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + s = a.sex == female ? "♀" : "♂" + print(io, "$A$s #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, w::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end + +########## Eating / Dying / Reproducing ######################################## + +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function find_mate(a::Animal, w::World) + ms = filter(x->mates(x,a), w.agents |> values |> collect) + isempty(ms) ? nothing : sample(ms) +end +mates(a::Animal{A}, b::Animal{A}) where A<:AnimalSpecies = a.sex != b.sex +mates(::Agent, ::Agent) = false + +function reproduce!(a::Animal{A}, w::World) where A + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + vals = [getproperty(a,n) for n in fieldnames(Animal) if n ∉ [:id, :sex]] + new_id = w.max_id + 1 + ŝ = Animal{A}(new_id, vals..., randsex()) + w.agents[ŝ.id] = ŝ + w.max_id = new_id + end +end + + + +########## Counting agents #################################################### + +agent_count(p::Plant) = p.size / p.max_size +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::Agent{S}) where S<:Species + n = nameof(S) + d[n] = haskey(d,n) ? d[n]+agent_count(a) : agent_count(a) + return d + end + reduce(op, w.agents |> values, init=Dict{Symbol,Float64}()) +end diff --git a/docs_vitepress/src/lectures/lecture_03/animals.jpg b/docs_vitepress/src/lectures/lecture_03/animals.jpg new file mode 100644 index 00000000..35bfa0e7 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_03/animals.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_03/design_patterns.jpg b/docs_vitepress/src/lectures/lecture_03/design_patterns.jpg new file mode 100644 index 00000000..309ee687 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_03/design_patterns.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_03/hw.md b/docs_vitepress/src/lectures/lecture_03/hw.md new file mode 100644 index 00000000..d73dd4a3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_03/hw.md @@ -0,0 +1,88 @@ +# Homework 3 + +In this homework we will implement a function `find_food` and practice the use of closures. +The solution of lab 3 can be found +[here](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/2023W/docs/src/lecture_03/Lab03Ecosystem.jl). +You can use this file and add the code that you write for the homework to it. + +## How to submit? + +Put all your code (including your or the provided solution of lab 2) +in a script named `hw.jl`. Zip only this file (not its parent folder) and +upload it to BRUTE. + +```@setup block +include("Lab03Ecosystem.jl") + +function find_food(a::Animal, w::World) + as = filter(x -> eats(a,x), w.agents |> values |> collect) + isempty(as) ? nothing : rand(as) +end + +eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 +eats(::Animal{Wolf},::Animal{Sheep}) = true +eats(::Agent,::Agent) = false + +function every_nth(f::Function, n::Int) + i = 1 + function callback(args...) + # display(i) # comment this out to see out the counter increases + if i == n + f(args...) + i = 1 + else + i += 1 + end + end +end +``` + +## Agents looking for food + +::: danger Homework + +Implement a method `find_food(a::Animal, w::World)` returns one randomly chosen +agent from all `w.agents` that can be eaten by `a` or `nothing` if no food could +be found. This means that if e.g. the animal is a `Wolf` you have to return one +random `Sheep`, etc. + +*Hint*: You can write a general `find_food` method for all animals and move the +parts that are specific to the concrete animal types to a separate function. +E.g. you could define a function `eats(::Animal{Wolf}, ::Animal{Sheep}) = true`, etc. + +You can check your solution with the public test: + +```@repl block +sheep = Sheep(1,pf=1.0) +world = World([Grass(2), sheep]) +find_food(sheep, world) isa Plant{Grass} +``` + +::: + +## Callbacks & Closures + +::: danger Homework + +Implement a function `every_nth(f::Function,n::Int)` that takes an inner +function `f` and uses a closure to construct an outer function `g` that only +calls `f` every `n`th call to `g`. For example, if `n=3` the inner function `f` be called +at the 3rd, 6th, 9th ... call to `g` (not at the 1st, 2nd, 4th, 5th, 7th... call). + +**Hint**: You can use splatting via `...` to pass on an unknown number of +arguments from the outer to the inner function. + +::: + +You can use `every_nth` to log (or save) the agent count only every couple of +steps of your simulation. Using `every_nth` will look like this: + +```@repl block +w = World([Sheep(1), Grass(2), Wolf(3)]) +# `@info agent_count(w)` is executed only every 3rd call to logcb(w) +logcb = every_nth(w->(@info agent_count(w)), 3); + +logcb(w); # x->(@info agent_count(w)) is not called +logcb(w); # x->(@info agent_count(w)) is not called +logcb(w); # x->(@info agent_count(w)) *is* called +``` diff --git a/docs_vitepress/src/lectures/lecture_03/lab.md b/docs_vitepress/src/lectures/lecture_03/lab.md new file mode 100644 index 00000000..cd16c590 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_03/lab.md @@ -0,0 +1,383 @@ +# [Lab 3: Predator-Prey Agents](@id lab03) + +```@setup forward +include("../lecture_02/Lab02Ecosystem.jl") +``` + +In this lab we will look at two different ways of extending our agent +simulation to take into account that animals can have two different sexes: +*female* and *male*. + +In the first part of the lab you will re-use the code from [lab 2](@ref lab02) +and create a new type of sheep (`⚥Sheep`) which has an additional field *sex*. +In the second part you will redesign the type hierarchy from scratch using +parametric types to make this agent system much more flexible and *julian*. + + +## Part I: Female & Male Sheep + +The code from lab 2 that you will need in the first part of this lab can be +found [here](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/docs/src/lecture_02/Lab02Ecosystem.jl). + +The goal of the first part of the lab is to demonstrate the *forwarding method* +(which is close to how things are done in OOP) by implementing a sheep that can +have two different sexes and can only reproduce with another sheep of opposite sex. + +This new type of sheep needs an additonal field `sex::Symbol` which can be either +`:male` or `:female`. +In OOP we would simply inherit from `Sheep` and create a `⚥Sheep` +with an additional field. In Julia there is no inheritance - only subtyping of +abstract types. +As you cannot inherit from a concrete type in Julia, we will have to create a +wrapper type and forward all necessary methods. This is typically a sign of +unfortunate type tree design and should be avoided, but if you want to extend a +code base by an unforeseen type this forwarding of methods is a nice +work-around. Our `⚥Sheep` type will simply contain a classic `sheep` and a +`sex` field + +```@example forward +struct ⚥Sheep <: Animal + sheep::Sheep + sex::Symbol +end +⚥Sheep(id, e=4.0, Δe=0.2, pr=0.8, pf=0.6, sex=rand(Bool) ? :female : :male) = ⚥Sheep(Sheep(id,e,Δe,pr,pf),sex) +nothing # hide +``` + +```@repl forward +sheep = ⚥Sheep(1) +sheep.sheep +sheep.sex +``` + +Instead of littering the whole code with custom getters/setters Julia allows us +to overload the `sheep.field` behaviour by implementing custom +`getproperty`/`setproperty!` methods. + +::: warning Exercise + +Implement custom `getproperty`/`setproperty!` methods which allow to access the +`Sheep` inside the `⚥Sheep` as if we would not be wrapping it. + +::: + +::: details Show solution + +```@example forward +# NOTE: the @forward macro we will discuss in a later lecture is based on this + +function Base.getproperty(s::⚥Sheep, name::Symbol) + if name in fieldnames(Sheep) + getfield(s.sheep,name) + else + getfield(s,name) + end +end + +function Base.setproperty!(s::⚥Sheep, name::Symbol, x) + if name in fieldnames(Sheep) + setfield!(s.sheep,name,x) + else + setfield!(s,name,x) + end +end +``` + +::: + +You should be able to do the following with your overloads now +```@repl forward +sheep = ⚥Sheep(1) +sheep.id +sheep.sex +sheep.energy += 1 +sheep +``` + +In order to make the `⚥Sheep` work with the rest of the code we only have +to forward the `eat!` method + +```@repl forward +eat!(s::⚥Sheep, food, world) = eat!(s.sheep, food, world); +sheep = ⚥Sheep(1); +grass = Grass(2); +world = World([sheep,grass]) +eat!(sheep, grass, world) +``` + +and implement a custom `reproduce!` method with the behaviour that we want. + +However, the extension of `Sheep` to `⚥Sheep` is a very object-oriented approach. +With a little bit of rethinking, we can build a much more elegant solution that +makes use of Julia's powerful parametric types. + +## Part II: A new, parametric type hierarchy + +First, let us note that there are two fundamentally different types of agents in +our world: animals and plants. All species such as grass, sheep, wolves, etc. +can be categorized as one of those two. We can use Julia's powerful, +*parametric* type system to define one large abstract type for all agents +`Agent{S}`. The `Agent` will either be an `Animal` or a `Plant` with a type +parameter `S` which will represent the specific animal/plant +species we are dealing with. + +This new type hiearchy can then look like this: + +```@example parametric +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +# instead of Symbols we can use an Enum for the sex field +# using an Enum here makes things easier to extend in case you +# need more than just binary sexes and is also more explicit than +# just a boolean +@enum Sex female male +``` + +```@setup parametric +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + max_id = maximum(a.id for a in agents) + World(Dict(a.id=>a for a in agents), max_id) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end +``` + +Now we can create a *concrete* type `Animal` with the two parametric types +and the fields that we already know from lab 2. + +```@example parametric +mutable struct Animal{A<:AnimalSpecies} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 + const sex::Sex +end +``` + +To create an instance of `Animal` we have to specify the parametric type +while constructing it + +```@repl parametric +Animal{Wolf}(1,5,5,1,1,female) +``` + +Note that we now automatically have animals of any species without additional work. +Starting with the overload of the `show` method we can already see that we can +abstract away a lot of repetitive work into the type system. We can implement +*one single* `show` method for all animal species! + +::: warning Exercise + +Implement `Base.show(io::IO, a::Animal)` with a single method for all `Animal`s. +You can get the pretty (unicode) printing of the `Species` types with +another overload like this: `Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑")` + +::: + +::: details Show solution + +```@example parametric +function Base.show(io::IO, a::Animal{A}) where {A<:AnimalSpecies} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + s = a.sex == female ? "♀" : "♂" + print(io, "$A$s #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species/sexes we will only have to overload `show` on the +# abstract species types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") +``` + +::: + +Unfortunately we have lost the convenience of creating plants and animals +by simply calling their species constructor. For example, `Sheep` is just an +abstract type that we cannot instantiate. However, we can manually define +a new constructor that will give us this convenience back. +This is done in exactly the same way as defining a constructor for a concrete type: + +```julia +Sheep(id,E,ΔE,pr,pf,s=rand(Sex)) = Animal{Sheep}(id,E,ΔE,pr,pf,s) +``` + +Ok, so we have a constructor for `Sheep` now. But what about all the other +billions of species that you want to define in your huge master thesis project of +ecosystem simulations? Do you have to write them all by hand? *Do not +despair!* Julia has you covered. + +::: warning Exercise + +Overload all `AnimalSpecies` types with a constructor. +You already know how to write constructors for specific types such as `Sheep`. +Can you manage to sneak in a type variable? Maybe with `Type`? + +::: + +::: details Show solution + +```@example parametric +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,s::Sex) where T + Animal{A}(id,E,ΔE,pr,pf,s) +end + +# get the per species defaults back +randsex() = rand(instances(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, s=randsex()) = Sheep(id, E, ΔE, pr, pf, s) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, s=randsex()) = Wolf(id, E, ΔE, pr, pf, s) +nothing # hide +``` + +::: + +We have our convenient, high-level behaviour back! + +```@repl parametric +Sheep(1) +Wolf(2) +``` + +::: warning Exercise + +Check the methods for `eat!` and `kill_agent!` which involve `Animal`s and update +their type signatures such that they work for the new type hiearchy. + +::: + +::: details Show solution + +```@example parametric +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end + +# no change +# eat!(::Animal, ::Nothing, ::World) = nothing + +# no change +# kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +eats(::Animal{Wolf},::Animal{Sheep}) = true +eats(::Agent,::Agent) = false +# this one needs to wait until we have `Plant`s +# eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 + +nothing # hide +``` + +::: + +::: warning Exercise + +Finally, we can implement the new behaviour for `reproduce!` which we wanted. +Build a function which first finds an animal species of opposite sex and then +lets the two reproduce (same behaviour as before). + +::: + +::: details Show solution + +```@example parametric +mates(a::Animal{A}, b::Animal{A}) where A<:AnimalSpecies = a.sex != b.sex +mates(::Agent, ::Agent) = false + +function find_mate(a::Animal, w::World) + ms = filter(x->mates(x,a), w.agents |> values |> collect) + isempty(ms) ? nothing : rand(ms) +end + +function reproduce!(a::Animal{A}, w::World) where {A} + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + vals = [getproperty(a,n) for n in fieldnames(Animal) if n ∉ [:id, :sex]] + new_id = w.max_id + 1 + ŝ = Animal{A}(new_id, vals..., randsex()) + w.agents[ŝ.id] = ŝ + w.max_id = new_id + end +end +nothing # hide +``` + +::: + +```@repl parametric +s1 = Sheep(1, s=female) +s2 = Sheep(2, s=male) +w = World([s1, s2]) +reproduce!(s1, w); w +``` + +::: warning Exercise + +Implement the type hiearchy we designed for `Plant`s as well. + +::: + +::: details Show solution + +```@example parametric +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + id::Int + size::Int + max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, w::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 + +nothing # hide +``` + +::: + +```@repl parametric +g = Grass(2) +s = Sheep(3) +w = World([g,s]) +eat!(s,g,w); w +``` diff --git a/docs_vitepress/src/lectures/lecture_03/lecture.md b/docs_vitepress/src/lectures/lecture_03/lecture.md new file mode 100644 index 00000000..f96a57df --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_03/lecture.md @@ -0,0 +1,660 @@ +# Design patterns: good practices and structured thinking + +Every software developer has a desire to write better code. A desire +to improve system performance. A desire to design software that is easy to maintain, easy to understand and explain. + +Design patterns are recommendations and good practices accumulating knowledge of experienced programmers. + +The highest level of experience contains the design guiding principles: +- **SOLID:** Single Responsibility, Open/Closed, Liskov Substitution, Interface +- Segregation, Dependency Inversion +- **DRY:** Don't Repeat Yourself +- **KISS:** Keep It Simple, Stupid! +- **POLA:** Principle of Least Astonishment +- **YAGNI:** You Aren't Gonna Need It (overengineering) +- **POLP:** Principle of Least Privilege + +While these high-level concepts are intuitive, they are too general to give specific answers. + +More detailed patterns arise for programming paradigms (declarative, imperative) with specific instances of functional or object-oriented programming. + +The concept of design patterns originates in the OOP paradigm. OOP defines a strict way how to write software. Sometimes it is not clear how to squeeze real world problems into those rules. Cookbook for many practical situations + +- Gamma, E., Johnson, R., Helm, R., Johnson, R. E., & Vlissides, J. (1995). Design patterns: elements of reusable object-oriented software. Pearson Deutschland GmbH. + +Defining 23 design patterns in three categories. Became extremely popular. + +![](design_patterns.jpg) +(C) Scott Wlaschin + + +Is julia OOP or FP? It is different from both, based on: + +- types system (polymorphic) +- multiple dispatch (extending single dispatch of OOP) +- functions as first class +- decoupling of data and functions +- macros + +Any guidelines to solve real-world problems? + +- Hands-On Design Patterns and Best Practices with Julia Proven solutions to common problems in software design for Julia 1.x Tom Kwong, CFA + + +Fundamental tradeoff: rules vs. freedom + +- freedom: in the C language it is possible to access assembler instructions, use pointer arithmetics: + - it is possible to write extremely efficient code + - it is easy to segfault, leak memory, etc. +- rules: in strict languages (strict OOP, strict functional programming) you lose freedom for certain guarantees: + - e.g. strict functional programming guarantees that the program provably terminates + - operations that are simple e.g. in pointer arithmetics may become clumsy and inefficient in those strict rules. + - the compiler can validate the rules and complain if the code does not comply with them. + +Julia is again a dance between freedom and strict rules. It is more inclined to freedom. +Provides few simple concepts that allow to construct design patterns common in other languages. + +- the language does not enforce too many formalisms (via keywords (interface, trait, etc.) but they can be + - the compiler cannot check for correctness of these "patterns" + - the user has a lot of freedom (and responsibility) +- lots of features can be added by Julia packages (with various level of comfort) + - macros + +Read: + +## Design Patterns of OOP from the Julia viewpoint + +OOP is currently very popular concept (C++, Java, Python). It has strengths and weaknesses. The Julia authors tried to keep the strength and overcome weaknesses. + +Key features of OOP: + +- Encapsulation +- Inheritance +- Polymorphism + +Classical OOP languages define classes that bind processing functions to the data. Virtual methods are defined only for the attached methods of the classes. + +::: tip Encapsulation + +Refers to bundling of data with the methods that operate on that data, or the restricting of direct access to some of an object's components. Encapsulation is used to hide the values or state of a structured data object inside a class, preventing direct access to them by clients in a way that could expose hidden implementation details or violate state invariance maintained by the methods. + +::: + +::: tip Making Julia to mimic OOP + +There are many discussions how to make Julia to behave like an OOP. The best implementation to our knowledge is [ObjectOriented](https://github.com/Suzhou-Tongyuan/ObjectOriented.jl) + +::: + +### Encapsulation Advantage: Consistency and Validity + +With fields of data structure freely accessible, the information may become inconsistent. + +```julia +mutable struct Grass <: Plant + id::Int + size::Int + max_size::Int +end +``` + +What if I create Grass with larger size than ```max_size```? + +```julia +grass = Grass(1,50,5) +``` + +Freedom over Rules. Maybe I would prefer to introduce some rules. + +Some encapsulation may be handy keeping it consistent. Julia has ```inner constructor```. + +```julia +mutable struct Grass2 <: Plant + id::Int + size::Int + max_size::Int + Grass2(id,sz,msz) = sz > msz ? error("size can not be greater that max_size") : new(id,sz,msz) +end +``` + +When defined, Julia does not provide the default outer constructor. + +But fields are still accessible: + +```julia +grass.size = 10000 +``` + +Recall that `grass.size=1000` is a syntax of `setproperty!(grass,:size,1000)`, which can be redefined: + +```julia +function Base.setproperty!(obj::Grass, sym::Symbol, val) + if sym==:size + @assert val<=obj.max_size "size have to be lower than max_size!" + end + setfield!(obj,sym,val) +end +``` + +Function `setfield!` can not be overloaded. + +Julia has *partial encapsulation* via a mechanism for consistency checks. + +::: warning Array in imutable struct can be mutated + +The mutability applies to the structure and not to encapsulated structures. + +```julia +struct Foo + x::Float64 + y::Vector{Float64} + z::Dict{Int,Int} +end +``` + +In the structure `Foo`, `x` cannot be mutated, but fields of `y` and key-value pairs of `z` can be mutated, because they are mutable containers. But I cannot replace `y` with a different `Vector`. + +::: + +### Encapsulation Disadvantage: the Expression Problem + +Encapsulation limits the operations I can do with an object. Sometimes too much. Consider a matrix of methods/types(data-structures) + +Consider an existing matrix of data and functions: + +| data \ methods | find_food | eat! | grow! | +| :-- | --- | --- | --- | +| Wolf | | | | +| Sheep | | | | +| Grass | | | | + +You have a good reason not to modify the original source (maintenance). + +Imagine we want to extend the world to use new animals and new methods for all animals. + +Object-oriented programming + +- classes are primary objects (hierarchy) +- define animals as classes ( inheriting from abstract class) +- adding a new animal is easy +- adding a new method for all animals is hard (without modifying the original code) + +Functional programming + +- functions are primary +- define operations ```find_food```, ```eat!``` +- adding a new operation is easy +- adding new data structure to existing operations is hard + +Solutions: + +1. multiple-dispatch = julia +2. open classes (monkey patching) = add methods to classes on the fly +3. visitor pattern = partial fix for OOP [extended visitor pattern using dynamic_cast] + +### Morale: + +- Julia does not enforces creation getters/setters by default (setproperty is mapped to setfield) +- it provides tools to enforce access restriction if the user wants it. +- can be used to imitate objects: +https://stackoverflow.com/questions/39133424/how-to-create-a-single-dispatch-object-oriented-class-in-julia-that-behaves-l/39150509#39150509 + +## Polymorphism: + +::: tip Polymorphism in OOP + +Polymorphism is the method in an object-oriented programming language that performs different things as per the object’s class, which calls it. With Polymorphism, a message is sent to multiple class objects, and every object responds appropriately according to the properties of the class. + +::: + +Example animals of different classes make different sounds. In Python: + +```python +class Sheep: + def __init__(self, energy, Denergy): + self.energy = energy + self.Denergy = Denergy + + def make_sound(self): + print("Baa") + +sheep.make_sound() +wolf.make_sound() +``` + +Will make distinct sounds (baa, Howl). + +Can we achieve this in Julia? + +```julia +make_sound(::Sheep) = println("Baa") +make_sound(::Wolf) = println("Howl") +``` + +::: tip Implementation of virtual methods + +Virtual methods in OOP are typically implemented using Virtual Method Table, one for each class. +![](vtable.gif) + +Julia has a single method table. Dispatch can be either static or dynamic (slow). + +::: + +*Freedom* vs. Rules. + +- Duck typing is a type of polymorphism without static types + - more programming freedom, less formal guarantees +- julia does not check if ```make_sound``` exists for all animals. May result in `MethodError`. Responsibility of a programmer. + - define ```make_sound(A::AbstractAnimal)``` + +So far, the polymorphism coincides for OOP and julia because the method had only one argument => single argument dispatch. + +Multiple dispatch is an *extension* of the classical first-argument-polymorphism of OOP, to all-argument polymorphism. + + +::: tip Challenge for OOP + +How to code polymorphic behavior of interaction between two agents, e.g. an agent eating another agent in OOP? + +Complicated.... You need a "design pattern" for it. + +::: + +```python +class Sheep(Animal): + energy: float = 4.0 + denergy: float = 0.2 + reprprob: float = 0.5 + foodprob: float = 0.9 + + # hard, if not impossible to add behaviour for a new type of food + def eat(self, a: Agent, w: World): + if isinstance(a, Grass) + self.energy += a.size * self.denergy + a.size = 0 + else: + raise ValueError(f"Sheep cannot eat {type(a).__name__}.") +``` + +Consider an extension to: +- Flower : easy +- PoisonousGrass: harder + +Simple in Julia: + +```julia +eat!(w1::Sheep, a::Grass, w::World)= +eat!(w1::Sheep, a::Flower, w::World)= +eat!(w1::Sheep, a::PoisonousGrass, w::World)= +``` + +Boiler-plate code can be automated by macros / meta programming. + +## Inheritance + +::: tip Inheritance + +Is the mechanism of basing one object or class upon another object (prototype-based inheritance) or class (class-based inheritance), retaining similar implementation. Deriving new classes (sub classes) from existing ones such as super class or base class and then forming them into a hierarchy of classes. In most class-based object-oriented languages, an object created through inheritance, a "child object", acquires all the properties and behaviors of the "parent object" , with the exception of: constructors, destructor, overloaded operators. + +::: + +Most commonly, the sub-class inherits methods and the data. + +For example, in python we can design a sheep with additional field. +Think of a situation that we want to refine the reproduction procedure for sheeps by considering differences for male and female. We do not have information about gender in the original implementation. + +In OOP, we can use *inheritance*. + +```python +class Sheep: + def __init__(self, energy, Denergy): + self.energy = energy + self.Denergy = Denergy + + def make_sound(self): + print("Baa") + +class SheepWithGender(Sheep): + def __init__(self, energy, Denergy,gender): + super().__init__(energy, Denergy) + self.gender = gender + # make_sound is inherited + +# Can you do this in Julia?! +``` + +Simple answer: NO, not exactly + +- Sheep has fields, is a **concrete** type, we cannot extend it. + - with modification of the original code, we can define AbstractSheep with subtypes Sheep and SheepWithGender. +- But methods for AbstractAnimal works for sheeps! Is this inheritance? + + +### Inheritance vs. Subtyping + +Subtle difference: + +- subtyping = equality of interface +- inheritance = reuse of implementation + +In practice, subtyping reuse methods, *not* data fields. + +We have seen this in Julia, using type hierarchy: + +- ```agent_step!(a::Animal, w::World)``` +- all animals subtype of ```Animal``` "inherit" this method. + +The type hierarchy is only one way of subtyping. Julia allows many variations, e.g. concatenating different parts of hierarchies via the ```Union{}``` type: + +```julia +fancy_method(O::Union{Sheep,Grass}) = println("Fancy") +``` + +Is this a good idea? It can be done completely Ad-hoc! Freedom over Rules. + +There are very good use-cases: + +- Missing values: +```x::AbstractVector{<:Union{<:Number, Missing}}``` + +::: danger SubTyping issues + +With parametric types, unions and other construction, subtype resolution may become a complicated problem. Julia can even crash. +[Jan Vitek's Keynote at JuliaCon 2021](https://www.youtube.com/watch?v=LT4AP7CUMAw) + +::: + +### Sharing of data field via composition +Composition is also recommended in OOP: [Composition over ingeritance](https://en.wikipedia.org/wiki/Composition_over_inheritance) + +```julia +struct ⚥Sheep <: Animal + sheep::Sheep + sex::Symbol +end +``` + +If we want our new ⚥Sheep to behave like the original Sheep, we need to *forward* the corresponding methods. + +```julia +eat!(a::⚥Sheep, b::Grass, w::World)=eat!(a.sheep, b, w) +``` +and all other methods. Routine work. Boring! +The whole process can be automated using macros ```@forward``` from Lazy.jl. + + +Why so complicated? Wasn't the original inheritance tree structure better? + +- multiple inheritance: + - you just compose two different "trees". + - common example with ArmoredVehicle = Vehicle + Weapon +- Do you think there is only one sensible inheritance tree? + +::: tip Animal World + +Think of an inheritance tree of a full scope Animal world. + +Idea #1: Split animals by biological taxonomy +![](animals.jpg) + +Hold on. +- Sharks and dolphins can swim very well! +- Both bats and birds fly similarly! + +Idea #2: Split by the way they move! + +Idea #3: Split by way of ... + +::: + +In fact, we do not have a tree, but more like a matrix/tensor: + +| | swims | flies | walks | +| :-- | --- | --- | --- | +| birds | penguin | eagle | kiwi | +| mammal | dolphin | bat | sheep, wolf| +| insect | backswimmer | fly | beetle | + +Single type hierarchy will not work. Other approaches: + + - interfaces + - parametric types + + Analyze what features of animals are common and *compose* the animal: +```julia +abstract type HeatType end +abstract type MovementType end +abstract type ChildCare end + +mutable struct Animal{H<:HeatType,M<:MovementType,C<:ChildCare} + id::Int + ... +end +``` + +Now, we can define methods dispatching on parameters of the main type. + +Composition is simpler in such a general case. Composition over inheritance. + +A simple example of parametric approach will be demonstarted in the lab. + +## Interfaces: inheritance/subtyping without a hierarchy tree + +In OOP languages such as Java, interfaces have a dedicated keyword such that compiler can check correctness of the interface implementation. + +In Julia, interfaces can be achieved by defining ordinary functions. Not so strict validation by the compiler as in other languages. Freedom... + +### Example: Iterators + +Many fundamental objects can be iterated: Arrays, Tuples, Data collections... + +- They do not have any common "predecessor". They are almost "primitive" types. +- they share just the property of being iterable +- we do not want to modify them in any way + +Example: of interface ```Iterators``` +defined by "duck typing" via two functions. + +|Required methods | Brief description | +| --- | --- | +|iterate(iter) | Returns either a tuple of the first item and initial state or nothing if empty | +|iterate(iter, state) | Returns either a tuple of the next item and next state or nothing if no items remain | + +Defining these two methods for any object/collection ```C``` will make the following work: + +```julia +for o in C + # do something +end +``` + +- The compiler will not check if both functions exist. +- If one is missing, it will complain about it when it needs it +- The error message may be less informative than in the case of formal definition + +Note: + +- even iterators may have different features: they can be finite or infinite +- for finite iterators we can define useful functions (```collect```) +- how to pass this information in an extensible way? + +Poor solution: if statements. + +```julia +function collect(iter) + if iter isa Tuple... + +end +``` + +The compiler can do that for us. + +## Traits: cherry picking subtyping + +Trait mechanism in Julia is build using the existing tools: Type System and Multiple Dispatch. + +Traits have a few key parts: + +- Trait types: the different traits a type can have. +- Trait function: what traits a type has. +- Trait dispatch: using the traits. + +From iterators: + +```julia +# trait types: + +abstract type IteratorSize end +struct SizeUnknown <: IteratorSize end +struct HasLength <: IteratorSize end +struct IsInfinite <: IteratorSize end + +# Trait function: Input is a Type, output is a Type +IteratorSize(::Type{<:Tuple}) = HasLength() +IteratorSize(::Type) = HasLength() # HasLength is the default + +# ... + +# Trait dispatch +BitArray(itr) = gen_bitarray(IteratorSize(itr), itr) +gen_bitarray(isz::IteratorSize, itr) = gen_bitarray_from_itr(itr) +gen_bitarray(::IsInfinite, itr) = throw(ArgumentError("infinite-size iterable used in BitArray constructor")) +``` + +What is needed to define for a new type that I want to iterate over? + +Do you still miss inheritance in the OOP style? + +Many packages automating this with more structure: [Traitor.jl](https://github.com/andyferris/Traitor.jl), [SimpleTraits.jl](https://github.com/mauro3/SimpleTraits.jl), [BinaryTraits.jl](https://github.com/tk3369/BinaryTraits.jl) + +## Functional tools: Partial evaluation + +It is common to create a new function which "just" specify some parameters. + +```julia +_prod(x) = reduce(*,x) +_sum(x) = reduce(+,x) +``` + +## Functional tools: Closures + +::: tip Closure (lexical closure, function closure) + +A technique for implementing lexically scoped name binding in a language with first-class functions. Operationally, a closure is a record storing a function together with an environment. + +::: + +- originates in functional programming +- now widespread in many common languages, Python, Matlab, etc.. +- memory management relies on garbage collector in general (can be optimized by compiler) + +### Example + +```julia +function adder(x) + return y->x+y +end +``` + +creates a function that "closes" the argument ```x```. Try: ```f=adder(5); f(3)```. + +```julia +x = 30; + +function adder() + return y->x+y +end +``` + +creates a function that "closes" variable ```x```. + +```julia +f = adder(10) +f(1) + +g = adder() +g(1) +``` + +Such function can be passed as an argument: *together* with the closed data. + +### Implementation of closures in julia: documentation + +Closure is a record storing a function together with an environment. The environment is a mapping associating each *free* variable of the function (variables that are used locally, but defined in an enclosing scope) with the value or reference to which the name was bound when the closure was created. + +```julia +function adder(x) + return y->x+y +end +``` + +is lowered to (roughly): + +```julia +struct ##1{T} + x::T +end + +(_::##1)(y) = _.x + y + +function adder(x) + return ##1(x) +end +``` + +Note that the structure ##1 is not directly accessible. Try ```f.x``` and ```g.x```. + +### Functor = Function-like structure + +Each structure can have a method that is invoked when called as a function. + +```julia +(_::Sheep)()= println("🐑") + +``` +You can think of it as ```sheep.default_method()```. + + +### Coding style + +From ```Flux.jl```: + +```julia +function train!(loss, ps, data, opt; cb = () -> ()) + ps = Params(ps) + cb = runall(cb) + @progress for d in data + gs = gradient(ps) do + loss(batchmemaybe(d)...) + end + update!(opt, ps, gs) + cb() + end +end +``` + +Is this confusing? What can ```cb()``` do and what it can not? + +Note that function ```train!``` does not have many local variables. The important ones are arguments, i.e. exist in the scope from which the function was invoked. + +```julia +loss(x,y)=mse(model(x),y) +cb() = @info "training" loss(x,y) +train!(loss, ps, data, opt; cb=cb) +``` + +## Usage + +Usage of closures: + +- callbacks: the function can also modify the enclosed variable. +- abstraction: partial evaluation + +::: danger Beware: Performance of captured variables + +Inference of types may be difficult in closures: https://github.com/JuliaLang/julia/issues/15276 + +::: + +## Aditional materials + * [Functional design patterns](https://www.youtube.com/watch?v=srQt1NAHYC0) diff --git a/docs_vitepress/src/lectures/lecture_03/pred-prey.png b/docs_vitepress/src/lectures/lecture_03/pred-prey.png new file mode 100644 index 00000000..0e778ec7 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_03/pred-prey.png differ diff --git a/docs_vitepress/src/lectures/lecture_03/vtable.gif b/docs_vitepress/src/lectures/lecture_03/vtable.gif new file mode 100644 index 00000000..23a75e4a Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_03/vtable.gif differ diff --git a/docs_vitepress/src/lectures/lecture_04/Lab04Ecosystem.jl b/docs_vitepress/src/lectures/lecture_04/Lab04Ecosystem.jl new file mode 100644 index 00000000..10d4313c --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_04/Lab04Ecosystem.jl @@ -0,0 +1,195 @@ +using StatsBase + +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +# instead of Symbols we can use an Enum for the sex field +# using an Enum here makes things easier to extend in case you +# need more than just binary sexes and is also more explicit than +# just a boolean +@enum Sex female male + +########## World ############################################################# + +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + max_id = maximum(a.id for a in agents) + World(Dict(a.id=>a for a in agents), max_id) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end + + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 + const sex::Sex +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,s::Sex) where T + Animal{A}(id,E,ΔE,pr,pf,s) +end + +# get the per species defaults back +randsex() = rand(instances(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, s=randsex()) = Sheep(id, E, ΔE, pr, pf, s) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, s=randsex()) = Wolf(id, E, ΔE, pr, pf, s) + + +function Base.show(io::IO, a::Animal{A}) where {A<:AnimalSpecies} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + s = a.sex == female ? "♀" : "♂" + print(io, "$A$s #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +########## Eating / Dying / Reproducing ######################################## + +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eat!(::Animal, ::Nothing, ::World) = nothing + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function find_mate(a::Animal, w::World) + ms = filter(x->mates(x,a), w.agents |> values |> collect) + isempty(ms) ? nothing : sample(ms) +end +mates(a::Animal{A}, b::Animal{A}) where A<:AnimalSpecies = a.sex != b.sex +mates(::Agent, ::Agent) = false + +function reproduce!(a::Animal{A}, w::World) where A + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + vals = [getproperty(a,n) for n in fieldnames(Animal) if n ∉ [:id, :sex]] + new_id = w.max_id + 1 + ŝ = Animal{A}(new_id, vals..., randsex()) + w.agents[ŝ.id] = ŝ + w.max_id = new_id + end +end + +# finding food / who eats who +function find_food(a::Animal, w::World) + as = filter(x -> eats(a,x), w.agents |> values |> collect) + isempty(as) ? nothing : sample(as) +end +eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 +eats(::Animal{Wolf},::Animal{Sheep}) = true +eats(::Agent,::Agent) = false + + +########## Stepping through time ############################################# + +function agent_step!(p::Plant, ::World) + if p.size < p.max_size + p.size += 1 + end +end +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy <= 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end + return a +end + +function world_step!(world::World) + # make sure that we only iterate over IDs that already exist in the + # current timestep this lets us safely add agents + ids = copy(keys(world.agents)) + + for id in ids + # agents can be killed by other agents, so make sure that we are + # not stepping dead agents forward + !haskey(world.agents,id) && continue + + a = world.agents[id] + agent_step!(a,world) + end +end + + +########## Counting agents #################################################### + +agent_count(p::Plant) = p.size / p.max_size +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::Agent{S}) where S<:Species + n = nameof(S) + d[n] = haskey(d,n) ? d[n]+agent_count(a) : agent_count(a) + return d + end + reduce(op, w.agents |> values, init=Dict{Symbol,Float64}()) +end diff --git a/docs_vitepress/src/lectures/lecture_04/grass-sheep-wolf.jl b/docs_vitepress/src/lectures/lecture_04/grass-sheep-wolf.jl new file mode 100644 index 00000000..0dab503e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_04/grass-sheep-wolf.jl @@ -0,0 +1,39 @@ +using Plots +include("Lab04Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); + +counts = Dict(n=>[c] for (n,c) in agent_count(world)) +for _ in 1:100 + world_step!(world) + for (n,c) in agent_count(world) + push!(counts[n],c) + end +end + +plt = plot() +tolabel(::Type{Animal{Sheep}}) = "Sheep" +tolabel(::Type{Animal{Wolf}}) = "Wolf" +tolabel(::Type{Plant{Grass}}) = "Grass" +for (A,c) in counts + plot!(plt, c, label=tolabel(A), lw=2) +end +display(plt) diff --git a/docs_vitepress/src/lectures/lecture_04/hw.md b/docs_vitepress/src/lectures/lecture_04/hw.md new file mode 100644 index 00000000..258d8b15 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_04/hw.md @@ -0,0 +1,54 @@ +# [Homework 4](@id hw4) + +In this homework you will have to write two additional `@testset`s for the +Ecosystem. One testset should be contained in a file `test/sheep.jl` and verify +that the function `eat!(::Animal{Sheep}, ::Plant{Grass}, ::World)` works correctly. Another +testset should be in the file `test/wolf.jl` and veryfiy that the function +`eat!(::Animal{Wolf}, ::Animal{Sheep}, ::World)` works correctly. + +## How to submit? + +Zip the whole package folder `Ecosystem.jl` and upload it to BRUTE. +The package has to include at least the following files: + +``` +├── src +│ └── Ecosystem.jl +└── test + ├── sheep.jl # contains only a single @testset + ├── wolf.jl # contains only a single @testset + └── runtests.jl +``` + +Thet `test/runtests.jl` file can look like this: + +```julia +using Test +using Ecosystem + +include("sheep.jl") +include("wolf.jl") +# ... +``` + +## Test `Sheep` + +::: danger Homework + +1. Create a `Sheep` with food probability $p_f=1$ +2. Create *fully grown* `Grass` and a `World` with the two agents. +3. Execute `eat!(::Animal{Sheep}, ::Plant{Grass}, ::World)` +4. `@test` that the size of the `Grass` now has `size == 0` + +::: + +## Test `Wolf` + +::: danger Homework + +1. Create a `Wolf` with food probability $p_f=1$ +2. Create a `Sheep` and a `World` with the two agents. +3. Execute `eat!(::Animal{Wolf}, ::Animal{Sheep}, ::World)` +4. `@test` that the World only has one agent left in the agents dictionary + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_04/lab.md b/docs_vitepress/src/lectures/lecture_04/lab.md new file mode 100644 index 00000000..72406304 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_04/lab.md @@ -0,0 +1,389 @@ +# Lab 04: Packaging + +```@setup block +using StatsBase +include("../lecture_03/Lab03Ecosystem.jl") + +function find_food(a::Animal, w::World) + as = filter(x -> eats(a,x), w.agents |> values |> collect) + isempty(as) ? nothing : sample(as) +end + +eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 +eats(::Animal{Wolf},::Animal{Sheep}) = true +eats(::Agent,::Agent) = false +``` + +## Warmup - Stepping through time + +We now have all necessary functions in place to make agents perform one step +of our simulation. At the beginning of each step an animal looses energy. +Afterwards it tries to find some food, which it will subsequently eat. If the +animal then has less than zero energy it dies and is removed from the world. If +it has positive energy it will try to reproduce. + +Plants have a simpler life. They simply grow if they have not reached their maximal size. + +::: warning Exercise + +1. Implement a method `agent_step!(::Animal,::World)` which performs the following steps: + - Decrement $E$ of agent by `1.0`. + - With $p_f$, try to find some food and eat it. + - If $E<0$, the animal dies. + - With $p_r$, try to reproduce. +2. Implement a method `agent_step!(::Plant,::World)` which performs the following steps: + - If the size of the plant is smaller than `max_size`, increment the plant's size by one. + +::: + +::: details Show solution + +```@example block +function agent_step!(p::Plant, w::World) + if p.size < p.max_size + p.size += 1 + end +end + +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy < 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end +end + +nothing # hide +``` + +::: + +An `agent_step!` of a sheep in a world with a single grass should make it consume the grass, +let it reproduce, and eventually die if there is no more food and its energy is at zero: + +```@repl block +sheep = Sheep(1,2.0,2.0,1.0,1.0,male); +grass = Grass(2,2,2); +world = World([sheep, grass]) +agent_step!(sheep, world); world +# NOTE: The second agent step leads to an error. +# Can you figure out what is the problem here? +agent_step!(sheep, world); world +``` + +::: warning Exercise + +Finally, lets implement a function `world_step!` which performs one +`agent_step!` for each agent. Note that simply iterating over all agents could +lead to problems because we are mutating the agent dictionary. One solution for +this is to iterate over a copy of all agent IDs that are present when starting +to iterate over agents. Additionally, it could happen that an agent is killed +by another one before we apply `agent_step!` to it. To solve this you can check +if a given ID is currently present in the `World`. + +::: + +::: details Show solution + +```@example block +# make it possible to eat nothing +eat!(::Animal, ::Nothing, ::World) = nothing + +function world_step!(world::World) + # make sure that we only iterate over IDs that already exist in the + # current timestep this lets us safely add agents + ids = copy(keys(world.agents)) + + for id in ids + # agents can be killed by other agents, so make sure that we are + # not stepping dead agents forward + !haskey(world.agents,id) && continue + + a = world.agents[id] + agent_step!(a,world) + end +end + +nothing #hide +``` + +::: + +```@repl block +w = World([Sheep(1), Sheep(2), Wolf(3)]) +world_step!(w); w +world_step!(w); w +world_step!(w); w +``` + +Finally, lets run a few simulation steps and plot the solution + +```@example block +n_grass = 1_000 +n_sheep = 40 +n_wolves = 4 + +gs = [Grass(id) for id in 1:n_grass] +ss = [Sheep(id) for id in (n_grass+1):(n_grass+n_sheep)] +ws = [Wolf(id) for id in (n_grass+n_sheep+1):(n_grass+n_sheep+n_wolves)] +w = World(vcat(gs,ss,ws)) + +counts = Dict(n=>[c] for (n,c) in agent_count(w)) +for _ in 1:100 + world_step!(w) + for (n,c) in agent_count(w) + push!(counts[n],c) + end +end + +using Plots + +plt = plot() +for (n,c) in counts + plot!(plt, c, label=string(n), lw=2) +end +plt +``` + +## Package: `Ecosystem.jl` + +In the main section of this lab you will create your own `Ecosystem.jl` package +to organize *and test (!)* the code that we have written so far. + +### `PkgTemplates.jl` + +::: warning Exercise + +The simplest way to create a new package in Julia is to use `PkgTemplates.jl`. +`]add PkgTemplates` to your global julia env and create a new package by running: + +```julia +using PkgTemplates +Template(interactive=true)("Ecosystem") +``` + +to interactively specify various options for your new package or use the following +snippet to generate it programmatically: + +```julia +using PkgTemplates + +# define the package template +template = Template(; + user = "GithubUserName", # github user name + authors = ["Author1", "Author2"], # list of authors + dir = "/path/to/folder/", # dir in which the package will be created + julia = v"1.8", # compat version of Julia + plugins = [ + !CompatHelper, # disable CompatHelper + !TagBot, # disable TagBot + Readme(; inline_badges = true), # added readme file with badges + Tests(; project = true), # added Project.toml file for unit tests + Git(; manifest = false), # add manifest.toml to .gitignore + License(; name = "MIT") # addedMIT licence + ], +) + +# execute the package template (this creates all files/folders) +template("Ecosystem") +``` + +::: + +::: details Show solution + +This should have created a new folder `Ecosystem` which looks like below. + +``` +. +├── LICENSE +├── Project.toml +├── README.md +├── src +│ └── Ecosystem.jl +└── test + ├── Manifest.toml + ├── Project.toml + └── runtests.jl +``` + +If you `]activate /path/to/Ecosystem` you should be able to run `]test` to run the autogenerated test (which is not doing anything) +and get the following output: + +```julia +(Ecosystem) pkg> test + Testing Ecosystem + Status `/private/var/folders/6h/l9_skfms2v3dt8z3zfnd2jr00000gn/T/jl_zd5Uai/Project.toml` + [e77cd98c] Ecosystem v0.1.0 `~/repos/Ecosystem` + [8dfed614] Test `@stdlib/Test` + Status `/private/var/folders/6h/l9_skfms2v3dt8z3zfnd2jr00000gn/T/jl_zd5Uai/Manifest.toml` + [e77cd98c] Ecosystem v0.1.0 `~/repos/Ecosystem` + [2a0f44e3] Base64 `@stdlib/Base64` + [b77e0a4c] InteractiveUtils `@stdlib/InteractiveUtils` + [56ddb016] Logging `@stdlib/Logging` + [d6f4376e] Markdown `@stdlib/Markdown` + [9a3f8284] Random `@stdlib/Random` + [ea8e919c] SHA v0.7.0 `@stdlib/SHA` + [9e88b42a] Serialization `@stdlib/Serialization` + [8dfed614] Test `@stdlib/Test` + Testing Running tests... +Test Summary: |Time +Ecosystem.jl | None 0.0s + Testing Ecosystem tests passed +``` + +::: + + +::: danger + +From now on make sure that you **always** have the `Ecosystem` enviroment +enabled. Otherwise you will not end up with the correct dependencies in your +packages + +::: + +### Adding content to `Ecosystem.jl` + +::: warning Exercise + +Next, let's add the types and functions we have defined so +far. You can use `include("path/to/file.jl")` in the main module file at +`src/Ecosystem.jl` to bring some structure in your code. An exemplary +file structure could look like below. + +``` +. +├── LICENSE +├── Manifest.toml +├── Project.toml +├── README.md +├── src +│ ├── Ecosystem.jl +│ ├── animal.jl +│ ├── plant.jl +│ └── world.jl +└── test + └── runtests.jl +``` + +While you are adding functionality to your package you can make great use of +`Revise.jl`. Loading `Revise.jl` before your `Ecosystem.jl` will automatically +recompile (and invalidate old methods!) while you develop. You can install it +in your global environment and and create a `$HOME/.config/startup.jl` which always loads +`Revise`. It can look like this: + +```julia +# try/catch block to make sure you can start julia if Revise should not be installed +try + using Revise +catch e + @warn(e.msg) +end +``` + +::: + +::: danger + +At some point along the way you should run into problems with the `sample` +functions or when trying `using StatsBase`. This is normal, because you have +not added the package to the `Ecosystem` environment yet. Adding it is as easy +as `]add StatsBase`. Your `Ecosystem` environment should now look like this: + +```julia +(Ecosystem) pkg> status +Project Ecosystem v0.1.0 +Status `~/repos/Ecosystem/Project.toml` + [2913bbd2] StatsBase v0.33.21 +``` + +::: + +::: warning Exercise + +In order to use your new types/functions like below + +```julia +using Ecosystem + +Sheep(2) +``` + +you have to `export` them from your module. Add exports for all important types +and functions. + +::: + +::: details Show solution + +```julia +# src/Ecosystem.jl +module Ecosystem + +using StatsBase + +export World +export Species, PlantSpecies, AnimalSpecies, Grass, Sheep, Wolf +export Agent, Plant, Animal +export agent_step!, eat!, eats, find_food, reproduce!, world_step!, agent_count + +# .... + +end +``` + +::: + +### Unit tests + +Every package should have tests which verify the correctness of your +implementation, such that you can make changes to your codebase and remain +confident that you did not break anything. + +Julia's `Test` package provides you functionality to easily write [unit +tests](https://docs.julialang.org/en/v1/stdlib/Test/). + +::: warning Exercise + +In the file `test/runtests.jl`, create a new `@testset` and write three `@test`s +which check that the `show` methods we defined for `Grass`, `Sheep`, and `Wolf` work as expected. + +The function `repr(x) == "some string"` to check if the string representation we +defined in the `Base.show` overload returns what you expect. + +::: + +::: details Show solution + +```@repl block +# using Ecosystem +using Test + +@testset "Base.show" begin + g = Grass(1,1,1) + s = Animal{Sheep}(2,1,1,1,1,male) + w = Animal{Wolf}(3,1,1,1,1,female) + @test repr(g) == "🌿 #1 100% grown" + @test repr(s) == "🐑♂ #2 E=1.0 ΔE=1.0 pr=1.0 pf=1.0" + @test repr(w) == "🐺♀ #3 E=1.0 ΔE=1.0 pr=1.0 pf=1.0" +end +``` + +::: + +### Github CI +::: warning Exercise + +If you want you can upload you package to Github and add the `julia-runtest` +[Github Action](https://github.com/julia-actions/julia-runtest) to automatically +test your code for every new push you make to the repository. + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_04/lecture.md b/docs_vitepress/src/lectures/lecture_04/lecture.md new file mode 100644 index 00000000..f9f023c7 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_04/lecture.md @@ -0,0 +1,570 @@ +# [Package development](@id pkg_lecture) + +Organization of the code is more important with the increasing size of the project and the number of contributors and users. Moreover, it will become essential when different codebases are expected to be combined and reused. + +- Julia was designed from the beginning to encourage code reuse across different codebases as possible +- Julia ecosystem lives on a *namespace*. From then, it builds projects and environments. + +## Namespaces and modules + +Namespace logically separate +fragments of source code so that they can be developed independently without affecting +each other. If I define a function in one namespace, I will still be able to define another +function in a different namespace even though both functions have the same name. + +- prevents confusion when common words are used in different meaning: + - Too general name of functions "create", "extend", "loss", + - or data "X", "y" (especially in mathematics, think of π) + - may not be an issue if used with different types +- *Modules* is Julia syntax for a namespace + +Example: + +```julia +module MySpace + +function test1() + println("test1") +end + +function test2() + println("test2") +end + +export test1 + +#include("filename.jl") + +end +``` + +Function `include` copies content of the file to this location (will be part of the module). + +Creates functions: + +```julia +MySpace.test1 +MySpace.test2 +``` + +For easier manipulation, these functions can be "exported" to be exposed to the outer world (another namespace). + +Keyword: `using` exposes the exported functions and structs: + +```julia +using .MySpace +``` + +The dot means that the module was defined in this scope. + +Keyword: `import` imports function with availability to redefine it. + +Combinations: + +| usecase | results | +| :-- | :-- | +| using MySpace | MySpace.test1 | +| | MySpace.test2 | +| | test1 | +| using MySpace: test1 | test1 | +| import MySpace | MySpace.test1* | +| | MySpace.test2* | +| import MySpace: test1 | test1* | +| import MySpace: test2 | test2* | + + - symbol "*" denotes functions that can be redefined + + ```julia + using .MySpace: test1 + + test1()=println("new test") + + import .MySpace: test1 + + test1()=println("new test") +``` + +### Conflicts: + +When importing/using functions with name that is already imported/used from another module: + +- the imported functions/structs are invalidated. +- both functions has to be accessed by their full names. + +Resolution: + +- It may be easier to cherry pick only the functions we need (rather than importing all via `using`) +- rename some function using keyword `as` + +```julia +import MySpace2: test1 as t1 +``` + +### Submodules + +Modules can be used or included within other modules: + +```julia +module A + a=1; +end + +module B + module C + c = 2 + end + b = C.c # you can read from C (by reference) + using ..A: a + # a= b # but not write to A +end; +``` + +REPL of Julia is a module called "Main". + +- modules are not copied, but referenced, i.e. `B.b===B.C.c` +- including one module twice (from different packages) is not a problem +- Julia 1.9 has the ability to change the contextual module in the REPL: + ```REPL.activate(TestPackage)``` + +### Revise.jl + +The fact that Julia can redefine a function in a Module by importing it is used by package `Revise.jl` to synchronize REPL with a module or file. + +So far, we have worked in REPL. If you have a file that is loaded and you want to modify it, you would need to either: + +1. reload the whole file, or +2. copy the changes to REPL + +`Revise.jl` does the latter automatically. + +Example demo: + +```julia +using Revise.jl + +includet("example.jl") +``` + +Works with: + +- any package loaded with `import` or `using`, +- script loaded with `includet`, +- Base julia itself (with Revise.track(Base)) +- standard libraries (with, e.g., using Unicode; Revise.track(Unicode)) + +Does not work with variables! + +**How it works**: monitors source code for changes and then does: + +```julia +for def in setdiff(oldexprs, newexprs) + # `def` is an expression that defines a method. + # It was in `oldexprs`, but is no longer present in `newexprs`--delete the method. + delete_methods_corresponding_to_defexpr(mod, def) +end + +for def in setdiff(newexprs, oldexprs) + # `def` is an expression for a new or modified method. Instantiate it. + Core.eval(mod, def) +end +``` + +See [Modern Julia Workflows](https://modernjuliaworkflows.github.io) for more hints + +## Namespaces & scoping + +Every module introduces a new global scope. + +- Global scope + - No variable or function is expected to exist outside of it + - Every module is equal to a global scope (no single "global" exists) + - The REPL has a global module called `Main` +- Local scope + + Variables in Julia do not need to be explicitly declared, they are created by assignments: `x=1`. + In local scope, the compiler checks if variable `x` does not exist outside. We have seen: + + ```julia + x=1 + + f(y)=x+y + ``` + + The rules for local scope determine how to treat **assignment** of `x`. If local `x` exists, it is used, if it does not: + - in *hard* scope: new local `x` is created + - in *soft* scope: checks if `x` exists outside (global) + - if not: new local `x` is created + - if yes: the split is REPL/non-interactive: + - REPL: global `x` is used (convenience, as of 1.6) + - non-interactive: local `x` is created + +- keyword `local` and `global` can be used to specify which variable to use + +From documentation: + +| Construct | Scope type | Allowed within | +|:-- |:-- |:--- | +| `struct` | local (soft) | global | +| `for`, `while`, `try` | local (soft) | global, local | +| `macro` | local (hard) | global | +| functions, `do` blocks, `let` blocks, comprehensions, generators | local (hard) | global, local | + +Question: + +```julia +x=1 +f()= x=3 +f() + +@show x; +``` + +```julia +x = 1 + +for _ = 1:1 + x=3 +end + +@show x; +``` + +Notice that if does not introduce new scope + +```julia +if true + branch_taken = true +else + branch_not_taken = true +end +``` + +::: tip do-block + +Let's assume a function, which takes as a first argument a function + +```julia + g(f::Function, args...) = println("f called on $(args) evaluates to ", f(args...)) +``` + +We can use `g` as `g(+, 1, 2)`, or with a lambda function `g(x -> x^2, 2).` But sometimes, it might be useful to the lambda function to span multiple lines. This can be achieved by a `do` block as + +```julia +g(1,2,3) do a,b,c + a*b + c +end +``` + +::: + +## Packages + +Package is a source tree with a standard layout. It provides a module and thus can be loaded with `include` or `using`. + +Minimimal package: + +``` +PackageName/ +├── src/ +│ └── PackageName.jl +├── Project.toml +``` + +Contains: + +- `Project.toml` file describing basic properties: + - `Name`, does not have to be unique (federated package sources) + - `UUID`, has to be unique (generated automatically) + - optionally [deps], [targets],... + +- file `src/PackageName.jl` that defines module `PackageName` which is executed when loaded. + + +Many other optional directories: + +- directory tests/, (almost mandatory) +- directory docs/ (common) +- directory scripts/, examples/,... (optional) + + + +::: tip Type-Piracy + +"Type piracy" refers to the practice of extending or redefining methods in Base or other packages on types that you have not defined. In extreme cases, you can crash Julia (e.g. if your method extension or redefinition causes invalid input to be passed to a ccall). Type piracy can complicate reasoning about code, and may introduce incompatibilities that are hard to predict and diagnose. + +```julia +module A + +import Base.* + +*(x::Symbol, y::Symbol) = Symbol(x,y) + +end +``` + +::: + +The package typically loads other modules that form package dependencies. + +## Project environments + +Is a package that does not contain `Name` and `UUID` in `Project.toml`. It's used when you don't need to create a package for your work. It's created by `activate some/path` in REPL package mode. + +## Project Manifest + +Both package and environment can contain an additional file `Manifest.toml`. +This file tracks full dependency tree of a project including versions of the packages on which it depends. + +for example: + +```toml +# This file is machine-generated - editing it directly is not advised + +[[AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.0.1" + +[[AbstractTrees]] +git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.3.4" +``` + +Content of files `Project.toml` and `Manifest.toml` are maintained by PackageManager. + +## Package manager + +Handles both packages and projects: +- creating a project `]generate PkgName` +- adding an existing project `add PkgName` or ` add https://github.com/JuliaLang/Example.jl` + + Names are resolved by Registrators (public or private). + +- removing `]rm PkgName` +- updating `]update` +- developing `]dev http://...` + - `add` treats packages as being finished, version handling pkg manager. Precompiles! + - `dev` leaves all operations on the package to the user (git versioning, etc.). Always read content of files + +By default these operations are related to environment `.julia/environments/v1.9` + +E.g. running and updating will update packages in `Manifest.toml` in this directory. What if the update breaks functionality of some project package that uses special features? + +There can and *should* be more than one environment! + +Project environments are based on files with installed packages. + +- switching by `]activate Path` - creates `Project.toml` if not existing + +- from that moment, all package modifications will be relevant only to this project! +- when switching to a new project `]instantiate` will prepare (download and precompile) the environment + - creates `Manifest.toml` = list of all exact versions of all packages +- which Packages are visible is determined by `LOAD_PATH` + - typically contains default libraries and default environment + - it is different for REPL and Pkg.tests ! No default env. in tests. + + +## Package hygiene - workflow + +::: danger Potential danger + +Package dependencies may not be compatible: +- package `A` requires `C@<0.2` +- package `B` requires `C@>0.3` +- what should happen when `]add A` and `add B`? + +::: + +- keep your "@v#.#" as clean as possible (recommended are only debugging/profiling packages) +- use packages as much as you can, even for short work with scripts `]activate .` + - adding a package existing elsewhere is cheap (global cache) +- if do you not wish to store any files just test random tricks of a cool package: `]activate --temp` + +### Package development with Revise + +Developing a package with interactive test/development: + +1. Create a package/module at one directory `MainPackage` +2. Create a script at another directory `MainScript`, and activate it `]activate .` +3. `dev MainPackage` in the `MainScript` environment + - Revise.jl will watch the `MainPackage` so it is always up to date + - in `dev` mode you have full control over commits etc. + +### Package Extensions + +Some functionality of a package that depends on external packages may not be always needed. A typical example is plotting and visualization that may reguire heavy visualization packages. +These are completely unnecessary e.g. in distributed server number crunching. + +The extension is a new module depending on: i) the base package, and ii) the conditioning package. +It will not be compiled if the conditioning package is not loaded. Once the optional package is loaded, the extension will be automatically compiled and loaded. + +New feature since Julia 1.9: +- new directory in project tree: `ext/` the extending module is stored here +- new section in `Project.toml` called `[extensions]` listing extension names and their conditioning packages + +## Unit testing, /test + +Without explicit keywords for checking constructs (think missing functions in interfaces), the good quality of the code is guaranteed by detailed unit testing. + +- each package should have directory `/test` +- file `/test/runtest.jl` is run by the command `]test` of the package manager + + this file typically contains `include` of other tests + +- no formal structure of tests is prescribed + - test files are just ordinary julia scripts + - user is free to choose what to test and how (freedom x formal rules) + +- testing functionality is supported by macros `@test` and `@teststet` + + ```julia + @testset "trigonometric identities" begin + θ = 2/3*π + @test sin(-θ) ≈ -sin(θ) + @test cos(-θ) ≈ cos(θ) + @test sin(2θ) ≈ 2*sin(θ)*cos(θ) + @test cos(2θ) ≈ cos(θ)^2 - sin(θ)^2 + end; + ``` + +Testset is a collection of tests that will be run and summarized in a common report. + - Testsets can be nested: testsets in testsets + - tests can be in loops or functions + + ```julia + for i=1:10 + @test a[i]>0 + end + ``` + + - Useful macro `≈` checks for equality with given tolerance + + ```julia + a=5+1e-8 + @test a≈5 + @test a≈5 atol=1e-10 + ``` + +- @testset resets RNG to Random.GLOBAL_SEED before and after the test for repeatability + + The same results of RNG are not guaranteed between Julia versions! + +- Test coverage: package `Coverage.jl` +- Can be run automatically by continuous integration, e.g. GitHub actions +- integration in VSCode test via package `TestItems.jl` + + +## Documentation & Style, /docs + +A well written package is reusable if it is well documented. + +The simpliest kind of documentation is the docstring: +```julia +"Auxiliary function for printing a hello" +hello()=println("hello") + +""" +More complex function that adds π to input: +- x is the input argument (itemize) + +Can be written in latex: ``x \leftarrow x + \pi`` +""" +addπ(x) = x+π +``` + +Yieds: + +::: tip Renders as + +More complex function that adds π to input: +- x is the input argument (itemize) + +Can be written in latex: ``x \leftarrow x + \pi`` + +::: + +Structure of the document + +``` +PackageName/ +├── src/ +│ └── SourceFile.jl +├── docs/ +│ ├── build/ +│ ├── src/ +│ └── make.jl +... +``` + +Where the line-by-line documentation is in the source files. + +- `/docs/src` folder can contain more detailed information: introductory pages, howtos, tutorials, examples +- running `make.jl` controls which pages are generated in what form (html or latex) documentation in the /build directory +- automated with GitHub actions + +Documentation is generated by the julia code. + +- code in documentation can be evaluated + +```@repl test +x=3 +@show x +``` + +- documentation can be added by code: + + ```julia + struct MyType + value::String + end + + Docs.getdoc(t::MyType) = "Documentation for MyType with value $(t.value)" + + x = MyType("x") + y = MyType("y") + ``` + + See `?x` and `?y`. + + It uses the same very standard building blocks: multiple dispatch. + +## Precompilation + +By default, every package is precompiled when loading and stored in compiled form in a cache. + +If it defines methods that extend previously defined (e.g. from Base), it may affect already loaded packages which need to be recompiled as well. May take time. + +Julia has a tracking mechanism that stores information about the whole graph of dependencies. + +Faster code can be achieved by the `precompile` directive: + +```julia +module FSum + +fsum(x) = x +fsum(x,p...) = x+fsum(p...) + +precompile(fsum,(Float64,Float64,Float64)) +end +``` + +Can be investigated using `MethodAnalysis`. + +```julia +using MethodAnalysis +mi =methodinstances(fsum) +``` + +Useful packages: +- `PackageCompiler.jl` has three main purposes: + + - Creating custom sysimages for reduced latency when working locally with packages that has a high startup time. + - Creating "apps" which are a bundle of files including an executable that can be sent and run on other machines without Julia being installed on that machine. + - Creating a relocatable C library bundle form of Julia code. + +- `AutoSysimages.jl` allows easy generation of precompiles images - reduces package loading + +## Additional material +- [Modern Julia Workflows](https://modernjuliaworkflows.github.io) diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_DictUnion/Ecosystem.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_DictUnion/Ecosystem.jl new file mode 100644 index 00000000..be8553d4 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_DictUnion/Ecosystem.jl @@ -0,0 +1,178 @@ +using StatsBase + +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +abstract type Sex end +abstract type Female <: Sex end +abstract type Male <: Sex end + +########## World ############################################################# + +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + ids = [a.id for a in agents] + length(unique(ids)) == length(agents) || error("Not all agents have unique IDs!") + + types = unique(typeof.(agents)) + dict = Dict{Int,Union{types...}}(a.id => a for a in agents) + World(dict, maximum(ids)) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end + + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies,S<:Sex} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,S::Type{<:Sex}) where T + Animal{A,S}(id,E,ΔE,pr,pf) +end + +# get the per species defaults back +randsex() = rand(subtypes(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, S=randsex()) = Sheep(id, E, ΔE, pr, pf, S) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, S=randsex()) = Wolf(id, E, ΔE, pr, pf, S) + + +function Base.show(io::IO, a::Animal{A,S}) where {A,S} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + print(io, "$A$S #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") +Base.show(io::IO, ::Type{Male}) = print(io,"♂") +Base.show(io::IO, ::Type{Female}) = print(io,"♀") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +########## Eating / Dying / Reproducing ######################################## + +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eat!(::Animal, ::Nothing, ::World) = nothing + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function find_agent(::Type{A}, w::World) where A<:Agent + dict = filter(x -> isa(x,A), w.agents |> values |> collect) + as = dict |> values |> collect + isempty(as) ? nothing : sample(as) +end + +find_food(::Animal{Wolf}, w::World) = find_agent(Animal{Sheep}, w) +find_food(::Animal{Sheep}, w::World) = find_agent(Plant{Grass}, w) + +find_mate(::Animal{A,Female}, w::World) where A<:AnimalSpecies = find_agent(Animal{A,Male}, w) +find_mate(::Animal{A,Male}, w::World) where A<:AnimalSpecies = find_agent(Animal{A,Female}, w) + +function reproduce!(a::Animal{A}, w::World) where A + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + vals = [getproperty(a,n) for n in fieldnames(Animal) if n ∉ [:id, :sex]] + new_id = w.max_id + 1 + ŝ = A(new_id, vals..., randsex()) + w.agents[ŝ.id] = ŝ + w.max_id = new_id + return ŝ + end +end + + +########## Stepping through time ############################################# + +function agent_step!(p::Plant, ::World) + if p.size < p.max_size + p.size += 1 + end +end +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy <= 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end + return a +end + +function world_step!(world::World) + # make sure that we only iterate over IDs that already exist in the + # current timestep this lets us safely add agents + ids = copy(keys(world.agents)) + + for id in ids + # agents can be killed by other agents, so make sure that we are + # not stepping dead agents forward + !haskey(world.agents,id) && continue + + a = world.agents[id] + agent_step!(a,world) + end +end diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_DictUnion/bench.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_DictUnion/bench.jl new file mode 100644 index 00000000..64619b32 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_DictUnion/bench.jl @@ -0,0 +1,27 @@ +using BenchmarkTools +using Random +Random.seed!(0) + +include("Ecosystem.jl") + +sheep = Sheep(1,1,1,1,1,Female) +sheep2 = Sheep(3001,1,1,1,1,Male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) + +# check that something is returned +@info "check returns" find_food(sheep, world) reproduce!(sheep, world) + +# check type stability +@code_warntype find_food(sheep, world) +@code_warntype reproduce!(sheep, world) + +# benchmark +sheep = Sheep(1,1,1,1,1,Female) +sheep2 = Sheep(3001,1,1,1,1,Male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime find_food($sheep, $world) + +sheep = Sheep(1,1,1,1,1,Female) +sheep2 = Sheep(3001,1,1,1,1,Male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime reproduce!($sheep, $world) diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/Ecosystem.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/Ecosystem.jl new file mode 100644 index 00000000..1b6d584b --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/Ecosystem.jl @@ -0,0 +1,241 @@ +using StatsBase +using Random + +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +abstract type Sex end +abstract type Female <: Sex end +abstract type Male <: Sex end + +########## World ############################################################# + +mutable struct World{T<:NamedTuple} + # this is a NamedTuple of Dict{Int,<:Agent} + # but I don't know how to express that as a parametric type + agents::T + max_id::Int +end + +function World(agents::Vector{<:Agent}) + types = unique(typeof.(agents)) + ags = map(types) do T + as = filter(x -> isa(x,T), agents) + Dict{Int,T}(a.id=>a for a in as) + end + nt = (; zip(tosym.(types), ags)...) + + ids = [a.id for a in agents] + length(unique(ids)) == length(agents) || error("Not all agents have unique IDs!") + World(nt, maximum(ids)) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + ts = join([valtype(a) for a in w.agents], ", ") + println(io, "World[$ts]") + for dict in w.agents + for (_,a) in dict + println(io," $a") + end + end +end + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies,S<:Sex} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,S::Type{<:Sex}) where T + Animal{A,S}(id,E,ΔE,pr,pf) +end + +# get the per species defaults back +randsex() = rand(Bool) ? Female : Male +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, S=randsex()) = Sheep(id, E, ΔE, pr, pf, S) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, S=randsex()) = Wolf(id, E, ΔE, pr, pf, S) + + +function Base.show(io::IO, a::Animal{A,S}) where {A,S} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + print(io, "$A$S #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") +Base.show(io::IO, ::Type{Male}) = print(io,"♂") +Base.show(io::IO, ::Type{Female}) = print(io,"♀") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +########## Eating / Dying / Reproducing ######################################## + +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eat!(::Animal, ::Nothing, ::World) = nothing + +kill_agent!(a::Agent, w::World) = delete!(getfield(w.agents, tosym(typeof(a))), a.id) + +function find_agent(::Type{A}, w::World) where A<:Agent + dict = get(w.agents, tosym(A), nothing) + if !isnothing(dict) + as = dict |> values |> collect + isempty(as) ? nothing : rand(as) + else + nothing + end +end + +find_agent(::Type{P}, w::World) where P<:PlantSpecies = find_agent(Plant{P}, w) + +function find_agent(::Type{A}, w::World) where A<:AnimalSpecies + df = get(w.agents, tosym(Animal{A,Female}), Dict{Int,Animal{A,Female}}()) + af = df |> values |> collect + + dm = get(w.agents, tosym(Animal{A,Male}), Dict{Int,Animal{A,Male}}()) + am = dm |> values |> collect + + nf = length(af) + nm = length(am) + if nf == 0 + # no females -> sample males + isempty(am) ? nothing : rand(am) + elseif nm == 0 + # no males -> sample females + isempty(af) ? nothing : rand(af) + else + # both -> sample uniformly from one or the other + rand() < nm/(nf+nm) ? rand(am) : rand(af) + end +end + +find_food(::Animal{Wolf}, w::World) = find_agent(Sheep, w) +find_food(::Animal{Sheep}, w::World) = find_agent(Grass, w) + +find_mate(::Animal{A,Female}, w::World) where A<:AnimalSpecies = find_agent(Animal{A,Male}, w) +find_mate(::Animal{A,Male}, w::World) where A<:AnimalSpecies = find_agent(Animal{A,Female}, w) + +function reproduce!(a::Animal{A,S}, w::World) where {A,S} + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + new_id = w.max_id + 1 + ŝ = Animal{A,S}(new_id, a.energy, a.Δenergy, a.reprprob, a.foodprob) + getfield(w.agents, tosym(ŝ))[ŝ.id] = ŝ + w.max_id = new_id + return ŝ + else + nothing + end +end + +########## Stepping through time ############################################# + +function agent_step!(p::Plant, ::World) + if p.size < p.max_size + p.size += 1 + end +end +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy <= 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end + return a +end + +function flat(w::World) + xs = map(zip(keys(w.agents), w.agents)) do (name, species) + map(species |> keys |> collect) do id + id, name + end + end + Iterators.flatten(xs) +end + +function world_step!(world::World) + for (id, field) in shuffle(flat(world) |> collect) + species = getfield(world.agents, field) + !haskey(species, id) && continue + a = species[id] + agent_step!(a, world) + end + + # this is faster but incorrect because species of same gender are treated + # one after another - which means that e.g. all Animal{Sheep,Female} will + # eat before all Animal{Sheep,Male} leaving less food for the latter + # map(world.agents) do species + # ids = copy(keys(species)) + # for id in ids + # !haskey(species,id) && continue + # a = species[id] + # agent_step!(a, world) + # end + # end +end + +# for accessing NamedTuple in World +tosym(::T) where T<:Animal = tosym(T) + +# NOTE: needed for type stability +# TODO: do this with meta programming +tosym(::Type{Animal{Wolf,Female}}) = Symbol("WolfFemale") +tosym(::Type{Animal{Wolf,Male}}) = Symbol("WolfMale") +tosym(::Type{Animal{Sheep,Female}}) = Symbol("SheepFemale") +tosym(::Type{Animal{Sheep,Male}}) = Symbol("SheepMale") +tosym(::Type{Plant{Grass}}) = Symbol("Grass") + diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/bench.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/bench.jl new file mode 100644 index 00000000..64619b32 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/bench.jl @@ -0,0 +1,27 @@ +using BenchmarkTools +using Random +Random.seed!(0) + +include("Ecosystem.jl") + +sheep = Sheep(1,1,1,1,1,Female) +sheep2 = Sheep(3001,1,1,1,1,Male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) + +# check that something is returned +@info "check returns" find_food(sheep, world) reproduce!(sheep, world) + +# check type stability +@code_warntype find_food(sheep, world) +@code_warntype reproduce!(sheep, world) + +# benchmark +sheep = Sheep(1,1,1,1,1,Female) +sheep2 = Sheep(3001,1,1,1,1,Male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime find_food($sheep, $world) + +sheep = Sheep(1,1,1,1,1,Female) +sheep2 = Sheep(3001,1,1,1,1,Male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime reproduce!($sheep, $world) diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/profview.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/profview.jl new file mode 100644 index 00000000..2fb35fb3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTupleDict/profview.jl @@ -0,0 +1,25 @@ +include("Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); + +world_step!(world) +world_step!(world) +@profview for i in 1:100 world_step!(world) end diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTuple_worldstep.png b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTuple_worldstep.png new file mode 100644 index 00000000..a4b87b7c Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_ST_world_NamedTuple_worldstep.png differ diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_DictUnion/Ecosystem.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_DictUnion/Ecosystem.jl new file mode 100644 index 00000000..a0c33455 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_DictUnion/Ecosystem.jl @@ -0,0 +1,202 @@ +using StatsBase + +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +# instead of Symbols we can use an Enum for the sex field +# using an Enum here makes things easier to extend in case you +# need more than just binary sexes and is also more explicit than +# just a boolean +@enum Sex female male + +########## World ############################################################# + +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + ids = [a.id for a in agents] + length(unique(ids)) == length(agents) || error("Not all agents have unique IDs!") + + types = unique(typeof.(agents)) + dict = Dict{Int,Union{types...}}(a.id => a for a in agents) + World(dict, maximum(ids)) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end + + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 + const sex::Sex +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,s::Sex) where T + Animal{A}(id,E,ΔE,pr,pf,s) +end + +# get the per species defaults back +randsex() = rand(instances(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, s=randsex()) = Sheep(id, E, ΔE, pr, pf, s) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, s=randsex()) = Wolf(id, E, ΔE, pr, pf, s) + + +function Base.show(io::IO, a::Animal{A}) where {A<:AnimalSpecies} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + s = a.sex == female ? "♀" : "♂" + print(io, "$A$s #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +########## Eating / Dying / Reproducing ######################################## + +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eat!(::Animal, ::Nothing, ::World) = nothing + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function find_mate(a::Animal, w::World) + ms = filter(x->mates(x,a), w.agents |> values |> collect) + isempty(ms) ? nothing : sample(ms) +end +mates(a::Animal{A}, b::Animal{A}) where A<:AnimalSpecies = a.sex != b.sex +mates(::Agent, ::Agent) = false + +function reproduce!(a::Animal{A}, w::World) where A + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + vals = [getproperty(a,n) for n in fieldnames(Animal) if n ∉ [:id, :sex]] + new_id = w.max_id + 1 + ŝ = Animal{A}(new_id, vals..., randsex()) + w.agents[ŝ.id] = ŝ + w.max_id = new_id + end +end + +# finding food / who eats who +function find_food(a::Animal, w::World) + as = filter(x -> eats(a,x), w.agents |> values |> collect) + isempty(as) ? nothing : sample(as) +end +eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 +eats(::Animal{Wolf},::Animal{Sheep}) = true +eats(::Agent,::Agent) = false + + +########## Stepping through time ############################################# + +function agent_step!(p::Plant, ::World) + if p.size < p.max_size + p.size += 1 + end +end +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy <= 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end + return a +end + +function world_step!(world::World) + # make sure that we only iterate over IDs that already exist in the + # current timestep this lets us safely add agents + ids = copy(keys(world.agents)) + + for id in ids + # agents can be killed by other agents, so make sure that we are + # not stepping dead agents forward + !haskey(world.agents,id) && continue + + a = world.agents[id] + agent_step!(a,world) + end +end + + +########## Counting agents #################################################### + +agent_count(p::Plant) = p.size / p.max_size +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::A) where A<:Agent + if A in keys(d) + d[A] += agent_count(a) + else + d[A] = agent_count(a) + end + return d + end + foldl(op, w.agents |> values |> collect, init=Dict()) +end diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_DictUnion/bench.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_DictUnion/bench.jl new file mode 100644 index 00000000..5c01fcd6 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_DictUnion/bench.jl @@ -0,0 +1,27 @@ +using BenchmarkTools +using Random +Random.seed!(0) + +include("Ecosystem.jl") + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) + +# check that something is returned +@info "check returns" find_food(sheep, world) reproduce!(sheep, world) + +# check type stability +@code_warntype find_food(sheep, world) +@code_warntype reproduce!(sheep, world) + +# benchmark +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime find_food($sheep, $world) + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime reproduce!($sheep, $world) diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/Ecosystem.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/Ecosystem.jl new file mode 100644 index 00000000..9de3db5d --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/Ecosystem.jl @@ -0,0 +1,230 @@ +using StatsBase + +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +# instead of Symbols we can use an Enum for the sex field +# using an Enum here makes things easier to extend in case you +# need more than just binary sexes and is also more explicit than +# just a boolean +@enum Sex female male + +########## World ############################################################# + +mutable struct World{T<:NamedTuple} + # this is a NamedTuple of Dict{Int,<:Agent} + # but I don't know how to express that as a parametric type + agents::T + max_id::Int +end + +function World(agents::Vector{<:Agent}) + types = unique(typeof.(agents)) + ags = map(types) do T + as = filter(x -> isa(x,T), agents) + Dict{Int,T}(a.id=>a for a in as) + end + nt = (; zip(tosym.(types), ags)...) + + ids = [a.id for a in agents] + length(unique(ids)) == length(agents) || error("Not all agents have unique IDs!") + World(nt, maximum(ids)) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + ts = join([valtype(a) for a in w.agents], ", ") + println(io, "World[$ts]") + for dict in w.agents + for (_,a) in dict + println(io," $a") + end + end +end + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 + const sex::Sex +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,s::Sex) where T + Animal{A}(id,E,ΔE,pr,pf,s) +end + +# get the per species defaults back +randsex() = rand(instances(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, s=randsex()) = Sheep(id, E, ΔE, pr, pf, s) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, s=randsex()) = Wolf(id, E, ΔE, pr, pf, s) + + +function Base.show(io::IO, a::Animal{A}) where {A<:AnimalSpecies} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + s = a.sex == female ? "♀" : "♂" + print(io, "$A$s #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +########## Eating / Dying / Reproducing ######################################## + +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eat!(::Animal, ::Nothing, ::World) = nothing + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function find_agent(::Type{A}, w::World) where A<:Agent + dict = get(w.agents, tosym(A), nothing) + if !isnothing(dict) + as = dict |> values |> collect + isempty(as) ? nothing : rand(as) + else + nothing + end +end + +find_agent(::Type{P}, w::World) where P<:PlantSpecies = find_agent(Plant{P}, w) +find_agent(::Type{A}, w::World) where A<:AnimalSpecies = find_agent(Animal{A}, w) + +find_food(::Animal{Wolf}, w::World) = find_agent(Sheep, w) +find_food(::Animal{Sheep}, w::World) = find_agent(Grass, w) + +function find_mate(a::A, w::World) where A<:Animal + dict = get(w.agents, tosym(A), nothing) + if !isnothing(dict) + as = filter(x -> a.sex != x.sex, dict |> values |> collect) + isempty(as) ? nothing : rand(as) + else + nothing + end +end + +function reproduce!(a::Animal{A}, w::World) where A + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + new_id = w.max_id + 1 + ŝ = Animal{A}(new_id, a.energy, a.Δenergy, a.reprprob, a.foodprob, randsex()) + getfield(w.agents, tosym(ŝ))[ŝ.id] = ŝ + w.max_id = new_id + return ŝ + else + nothing + end +end + +########## Stepping through time ############################################# + +function agent_step!(p::Plant, ::World) + if p.size < p.max_size + p.size += 1 + end +end +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy <= 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end + return a +end + +function world_step!(world::World) + # make sure that we only iterate over IDs that already exist in the + # current timestep this lets us safely add agents + ids = copy(keys(world.agents)) + + for id in ids + # agents can be killed by other agents, so make sure that we are + # not stepping dead agents forward + !haskey(world.agents,id) && continue + + a = world.agents[id] + agent_step!(a,world) + end +end + + +########## Counting agents #################################################### + +agent_count(p::Plant) = p.size / p.max_size +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::A) where A<:Agent + if A in keys(d) + d[A] += agent_count(a) + else + d[A] = agent_count(a) + end + return d + end + foldl(op, w.agents |> values |> collect, init=Dict()) +end + +# for accessing NamedTuple in World +tosym(::T) where T<:Animal = tosym(T) + +# NOTE: needed for type stability +# TODO: do this with meta programming +tosym(::Type{Animal{Wolf}}) = Symbol("Wolf") +tosym(::Type{Animal{Sheep}}) = Symbol("Sheep") +tosym(::Type{Plant{Grass}}) = Symbol("Grass") diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/bench.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/bench.jl new file mode 100644 index 00000000..910e0ca5 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/bench.jl @@ -0,0 +1,31 @@ +using BenchmarkTools +using Random +Random.seed!(0) + +include("Ecosystem.jl") + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) + +# check that something is returned +@info "check returns" find_food(sheep, world) reproduce!(sheep, world) + +# check type stability +@code_warntype find_food(sheep, world) + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@code_warntype reproduce!(sheep, world) + +# benchmark +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime find_food($sheep, $world) + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime reproduce!($sheep, $world) diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/profview.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/profview.jl new file mode 100644 index 00000000..e977655d --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/animal_S_world_NamedTupleDict/profview.jl @@ -0,0 +1,26 @@ +using BenchmarkTools + +include("Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); + +world_step!(world) +@profview for i=1:100 world_step!(world) end diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04-worldstep.png b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04-worldstep.png new file mode 100644 index 00000000..1c6de197 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04-worldstep.png differ diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/Ecosystem.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/Ecosystem.jl new file mode 100644 index 00000000..99cff0ff --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/Ecosystem.jl @@ -0,0 +1,199 @@ +using StatsBase + +abstract type Species end + +abstract type PlantSpecies <: Species end +abstract type Grass <: PlantSpecies end + +abstract type AnimalSpecies <: Species end +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +abstract type Agent{S<:Species} end + +# instead of Symbols we can use an Enum for the sex field +# using an Enum here makes things easier to extend in case you +# need more than just binary sexes and is also more explicit than +# just a boolean +@enum Sex female male + +########## World ############################################################# + +mutable struct World{A<:Agent} + agents::Dict{Int,A} + max_id::Int +end + +function World(agents::Vector{<:Agent}) + max_id = maximum(a.id for a in agents) + World(Dict(a.id=>a for a in agents), max_id) +end + +# optional: overload Base.show +function Base.show(io::IO, w::World) + println(io, typeof(w)) + for (_,a) in w.agents + println(io," $a") + end +end + + +########## Animals ########################################################### + +mutable struct Animal{A<:AnimalSpecies} <: Agent{A} + const id::Int + energy::Float64 + const Δenergy::Float64 + const reprprob::Float64 + const foodprob::Float64 + const sex::Sex +end + +function (A::Type{<:AnimalSpecies})(id::Int,E::T,ΔE::T,pr::T,pf::T,s::Sex) where T + Animal{A}(id,E,ΔE,pr,pf,s) +end + +# get the per species defaults back +randsex() = rand(instances(Sex)) +Sheep(id; E=4.0, ΔE=0.2, pr=0.8, pf=0.6, s=randsex()) = Sheep(id, E, ΔE, pr, pf, s) +Wolf(id; E=10.0, ΔE=8.0, pr=0.1, pf=0.2, s=randsex()) = Wolf(id, E, ΔE, pr, pf, s) + + +function Base.show(io::IO, a::Animal{A}) where {A<:AnimalSpecies} + e = a.energy + d = a.Δenergy + pr = a.reprprob + pf = a.foodprob + s = a.sex == female ? "♀" : "♂" + print(io, "$A$s #$(a.id) E=$e ΔE=$d pr=$pr pf=$pf") +end + +# note that for new species we will only have to overload `show` on the +# abstract species/sex types like below! +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") + + +########## Plants ############################################################# + +mutable struct Plant{P<:PlantSpecies} <: Agent{P} + const id::Int + size::Int + const max_size::Int +end + +# constructor for all Plant{<:PlantSpecies} callable as PlantSpecies(...) +(A::Type{<:PlantSpecies})(id, s, m) = Plant{A}(id,s,m) +(A::Type{<:PlantSpecies})(id, m) = (A::Type{<:PlantSpecies})(id,rand(1:m),m) + +# default specific for Grass +Grass(id; max_size=10) = Grass(id, rand(1:max_size), max_size) + +function Base.show(io::IO, p::Plant{P}) where P + x = p.size/p.max_size * 100 + print(io,"$P #$(p.id) $(round(Int,x))% grown") +end + +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +########## Eating / Dying / Reproducing ######################################## + +function eat!(wolf::Animal{Wolf}, sheep::Animal{Sheep}, w::World) + wolf.energy += sheep.energy * wolf.Δenergy + kill_agent!(sheep,w) +end +function eat!(sheep::Animal{Sheep}, grass::Plant{Grass}, ::World) + sheep.energy += grass.size * sheep.Δenergy + grass.size = 0 +end +eat!(::Animal, ::Nothing, ::World) = nothing + +kill_agent!(a::Agent, w::World) = delete!(w.agents, a.id) + +function find_mate(a::Animal, w::World) + ms = filter(x->mates(x,a), w.agents |> values |> collect) + isempty(ms) ? nothing : sample(ms) +end +mates(a::Animal{A}, b::Animal{A}) where A<:AnimalSpecies = a.sex != b.sex +mates(::Agent, ::Agent) = false + +function reproduce!(a::Animal{A}, w::World) where A + m = find_mate(a,w) + if !isnothing(m) + a.energy = a.energy / 2 + vals = [getproperty(a,n) for n in fieldnames(Animal) if n ∉ [:id, :sex]] + new_id = w.max_id + 1 + ŝ = Animal{A}(new_id, vals..., randsex()) + w.agents[ŝ.id] = ŝ + w.max_id = new_id + return ŝ + end +end + +# finding food / who eats who +function find_food(a::Animal, w::World) + as = filter(x -> eats(a,x), w.agents |> values |> collect) + isempty(as) ? nothing : sample(as) +end +eats(::Animal{Sheep},g::Plant{Grass}) = g.size > 0 +eats(::Animal{Wolf},::Animal{Sheep}) = true +eats(::Agent,::Agent) = false + + +########## Stepping through time ############################################# + +function agent_step!(p::Plant, ::World) + if p.size < p.max_size + p.size += 1 + end +end +function agent_step!(a::Animal, w::World) + a.energy -= 1 + if rand() <= a.foodprob + dinner = find_food(a,w) + eat!(a, dinner, w) + end + if a.energy <= 0 + kill_agent!(a,w) + return + end + if rand() <= a.reprprob + reproduce!(a,w) + end + return a +end + +function world_step!(world::World) + # make sure that we only iterate over IDs that already exist in the + # current timestep this lets us safely add agents + ids = copy(keys(world.agents)) + + for id in ids + # agents can be killed by other agents, so make sure that we are + # not stepping dead agents forward + !haskey(world.agents,id) && continue + + a = world.agents[id] + agent_step!(a,world) + end +end + + +########## Counting agents #################################################### + +agent_count(p::Plant) = p.size / p.max_size +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::A) where A<:Agent + if A in keys(d) + d[A] += agent_count(a) + else + d[A] = agent_count(a) + end + return d + end + foldl(op, w.agents |> values |> collect, init=Dict()) +end diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/bench.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/bench.jl new file mode 100644 index 00000000..5c01fcd6 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/bench.jl @@ -0,0 +1,27 @@ +using BenchmarkTools +using Random +Random.seed!(0) + +include("Ecosystem.jl") + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) + +# check that something is returned +@info "check returns" find_food(sheep, world) reproduce!(sheep, world) + +# check type stability +@code_warntype find_food(sheep, world) +@code_warntype reproduce!(sheep, world) + +# benchmark +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime find_food($sheep, $world) + +sheep = Sheep(1,1,1,1,1,female) +sheep2 = Sheep(3001,1,1,1,1,male) +world = World(vcat([sheep,sheep2], [Grass(i) for i=2:3000])) +@btime reproduce!($sheep, $world) diff --git a/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/grass-sheep-wolf.jl b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/grass-sheep-wolf.jl new file mode 100644 index 00000000..2fb35fb3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/ecosystems/lab04/grass-sheep-wolf.jl @@ -0,0 +1,25 @@ +include("Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); + +world_step!(world) +world_step!(world) +@profview for i in 1:100 world_step!(world) end diff --git a/docs_vitepress/src/lectures/lecture_05/hw.md b/docs_vitepress/src/lectures/lecture_05/hw.md new file mode 100644 index 00000000..b905f6f3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/hw.md @@ -0,0 +1,47 @@ +# Homework 5: Root finding of polynomials +This homework should test your ability to use the knowledge of benchmarking, profiling and others to improve an existing implementation of root finding methods for polynomials. The provided [code](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/docs/src/lecture_05/root_finding.jl) is of questionable quality. In spite of the artificial nature, it should simulate a situation in which you may find yourself quite often, as it represents some intermediate step of going from a simple script to something, that starts to resemble a package. + +## How to submit? +Put the modified `root_finding.jl` code inside `hw.jl`. Zip only this file (not its parent folder) and upload it to BRUTE. Your file should not use any dependency other than those already present in the `root_finding.jl`. + +::: danger Homework (2 points) + +Use profiler on the `find_root` function to find a piece of unnecessary code, that takes more time than the computation itself. The finding of roots with the polynomial + +```math +p(x) = (x - 3)(x - 2)(x - 1)x(x + 1)(x + 2)(x + 3) = x^7 - 14x^5 + 49x^3 - 36x +``` + +should not take more than `50μs` when running with the following parameters + +```julia +atol = 1e-12 +maxiter = 100 +stepsize = 0.95 + +x₀ = find_root(p, Bisection(), -5.0, 5.0, maxiter, stepsize, atol) +x₀ = find_root(p, Newton(), -5.0, 5.0, maxiter, stepsize, atol) +x₀ = find_root(p, Secant(), -5.0, 5.0, maxiter, stepsize, atol) +``` + +Remove obvious type instabilities in both `find_root` and `step!` functions. Each variable with "inferred" type `::Any` in `@code_warntype` will be penalized. + +**HINTS**: +- running the function repeatedly `1000x` helps in the profiler sampling +- focus on parts of the code that may have been used just for debugging purposes + +::: + +# Voluntary exercise + +::: danger Voluntary exercise< + +Use `Plots.jl` to plot the polynomial $p$ on the interval $[-5, 5]$ and visualize the progress/convergence of each method, with a dotted vertical line and a dot on the x-axis for each subsequent root approximation `x̃`. + +**HINTS**: +- plotting scalar function `f` - `plot(r, f)`, where `r` is a range of `x` values at which we evaluate `f` +- updating an existing plot - either `plot!(plt, ...)` or `plot!(...)`, in the former case the plot lives in variable `plt` whereas in the latter we modify some implicit global variable +- plotting dots - for example with `scatter`/`scatter!` +- `plot([(1.0,2.0), (1.0,3.0)], ls=:dot)` will create a dotted line from position `(x=1.0,y=2.0)` to `(x=1.0,y=3.0)` + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/Manifest.toml b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/Manifest.toml new file mode 100644 index 00000000..0828e8f4 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/Manifest.toml @@ -0,0 +1,2035 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.9.1" +manifest_format = "2.0" +project_hash = "418a40e4d97703f580431bcc6afdf6c41b947700" + +[[deps.AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.5.0" +weakdeps = ["ChainRulesCore", "Test"] + + [deps.AbstractFFTs.extensions] + AbstractFFTsChainRulesCoreExt = "ChainRulesCore" + AbstractFFTsTestExt = "Test" + +[[deps.AbstractLattices]] +git-tree-sha1 = "f35684b7349da49fcc8a9e520e30e45dbb077166" +uuid = "398f06c4-4d28-53ec-89ca-5b2656b7603d" +version = "0.2.1" + +[[deps.AbstractTrees]] +git-tree-sha1 = "faa260e4cb5aba097a73fab382dd4b5819d8ec8c" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.4.4" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "68c4c187a232e7abe00ac29e3b03e09af9d77317" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.7.0" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.Animations]] +deps = ["Colors"] +git-tree-sha1 = "e81c509d2c8e49592413bfb0bb3b08150056c79d" +uuid = "27a7e980-b3e6-11e9-2bcd-0b925532e340" +version = "0.4.1" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.ArrayInterface]] +deps = ["Adapt", "LinearAlgebra", "Requires", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "f83ec24f76d4c8f525099b2ac475fc098138ec31" +uuid = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9" +version = "7.4.11" + + [deps.ArrayInterface.extensions] + ArrayInterfaceBandedMatricesExt = "BandedMatrices" + ArrayInterfaceBlockBandedMatricesExt = "BlockBandedMatrices" + ArrayInterfaceCUDAExt = "CUDA" + ArrayInterfaceGPUArraysCoreExt = "GPUArraysCore" + ArrayInterfaceStaticArraysCoreExt = "StaticArraysCore" + ArrayInterfaceTrackerExt = "Tracker" + + [deps.ArrayInterface.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" + GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527" + StaticArraysCore = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" + Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" + +[[deps.ArrayInterfaceCore]] +deps = ["LinearAlgebra", "SnoopPrecompile", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "e5f08b5689b1aad068e01751889f2f615c7db36d" +uuid = "30b0a656-2188-435a-8636-2ec0e6a096e2" +version = "0.1.29" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Automa]] +deps = ["TranscodingStreams"] +git-tree-sha1 = "ef9997b3d5547c48b41c7bd8899e812a917b409d" +uuid = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" +version = "0.8.4" + +[[deps.AxisAlgorithms]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] +git-tree-sha1 = "66771c8d21c8ff5e3a93379480a2307ac36863f7" +uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" +version = "1.0.1" + +[[deps.AxisArrays]] +deps = ["Dates", "IntervalSets", "IterTools", "RangeArrays"] +git-tree-sha1 = "16351be62963a67ac4083f748fdb3cca58bfd52f" +uuid = "39de3d68-74b9-583c-8d2d-e117c070f3a9" +version = "0.4.7" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.BitFlags]] +git-tree-sha1 = "43b1a4a8f797c1cddadf60499a8a077d4af2cd2d" +uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" +version = "0.1.7" + +[[deps.BitTwiddlingConvenienceFunctions]] +deps = ["Static"] +git-tree-sha1 = "0c5f81f47bbbcf4aea7b2959135713459170798b" +uuid = "62783981-4cbd-42fc-bca8-16325de8dc4b" +version = "0.1.5" + +[[deps.Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.8+0" + +[[deps.CEnum]] +git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.2" + +[[deps.CPUSummary]] +deps = ["CpuId", "IfElse", "PrecompileTools", "Static"] +git-tree-sha1 = "601f7e7b3d36f18790e2caf83a882d88e9b71ff1" +uuid = "2a0fbf3d-bb9c-48f3-b0a9-814d99fd7ab9" +version = "0.2.4" + +[[deps.CRC32c]] +uuid = "8bf52ea8-c179-5cab-976a-9e18b702a9bc" + +[[deps.CRlibm]] +deps = ["CRlibm_jll"] +git-tree-sha1 = "32abd86e3c2025db5172aa182b982debed519834" +uuid = "96374032-68de-5a5b-8d9e-752f78720389" +version = "1.0.1" + +[[deps.CRlibm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e329286945d0cfc04456972ea732551869af1cfc" +uuid = "4e9b3aee-d8a1-5a3d-ad8b-7d824db253f0" +version = "1.0.1+0" + +[[deps.Cairo]] +deps = ["Cairo_jll", "Colors", "Glib_jll", "Graphics", "Libdl", "Pango_jll"] +git-tree-sha1 = "d0b3f8b4ad16cb0a2988c6788646a5e6a17b6b1b" +uuid = "159f3aea-2a34-519c-b102-8c37f9878175" +version = "1.0.5" + +[[deps.CairoMakie]] +deps = ["Base64", "Cairo", "Colors", "FFTW", "FileIO", "FreeType", "GeometryBasics", "LinearAlgebra", "Makie", "PrecompileTools", "SHA"] +git-tree-sha1 = "74384dc4aba2b377e22703e849154252930c434d" +uuid = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +version = "0.10.11" + +[[deps.Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "CompilerSupportLibraries_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "4b859a208b2397a7a623a03449e4636bdb17bcf2" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.16.1+1" + +[[deps.Calculus]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "f641eb0a4f00c343bbc32346e1217b86f3ce9dad" +uuid = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9" +version = "0.5.1" + +[[deps.ChainRulesCore]] +deps = ["Compat", "LinearAlgebra"] +git-tree-sha1 = "e0af648f0692ec1691b5d094b8724ba1346281cf" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.18.0" +weakdeps = ["SparseArrays"] + + [deps.ChainRulesCore.extensions] + ChainRulesCoreSparseArraysExt = "SparseArrays" + +[[deps.CloseOpenIntervals]] +deps = ["Static", "StaticArrayInterface"] +git-tree-sha1 = "70232f82ffaab9dc52585e0dd043b5e0c6b714f1" +uuid = "fb6a15b2-703c-40df-9091-08a04967cfa9" +version = "0.1.12" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "cd67fc487743b2f0fd4380d4cbd3a24660d0eec8" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.3" + +[[deps.ColorBrewer]] +deps = ["Colors", "JSON", "Test"] +git-tree-sha1 = "61c5334f33d91e570e1d0c3eb5465835242582c4" +uuid = "a2cac450-b92f-5266-8821-25eda20663c8" +version = "0.4.0" + +[[deps.ColorSchemes]] +deps = ["ColorTypes", "ColorVectorSpace", "Colors", "FixedPointNumbers", "PrecompileTools", "Random"] +git-tree-sha1 = "67c1f244b991cad9b0aa4b7540fb758c2488b129" +uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +version = "3.24.0" + +[[deps.ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "eb7f0f8307f71fac7c606984ea5fb2817275d6e4" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.11.4" + +[[deps.ColorVectorSpace]] +deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "SpecialFunctions", "Statistics", "TensorCore"] +git-tree-sha1 = "600cc5508d66b78aae350f7accdb58763ac18589" +uuid = "c3611d14-8923-5661-9e6a-0046d554d3a4" +version = "0.9.10" + +[[deps.Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "Reexport"] +git-tree-sha1 = "fc08e5930ee9a4e03f84bfb5211cb54e7769758a" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.12.10" + +[[deps.Combinatorics]] +git-tree-sha1 = "08c8b6831dc00bfea825826be0bc8336fc369860" +uuid = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" +version = "1.0.2" + +[[deps.CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[deps.Compat]] +deps = ["UUIDs"] +git-tree-sha1 = "8a62af3e248a8c4bad6b32cbbe663ae02275e32c" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "4.10.0" +weakdeps = ["Dates", "LinearAlgebra"] + + [deps.Compat.extensions] + CompatLinearAlgebraExt = "LinearAlgebra" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.0.2+0" + +[[deps.ConcurrentUtilities]] +deps = ["Serialization", "Sockets"] +git-tree-sha1 = "5372dbbf8f0bdb8c700db5367132925c0771ef7e" +uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" +version = "2.2.1" + +[[deps.ConstructionBase]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "c53fc348ca4d40d7b371e71fd52251839080cbc9" +uuid = "187b0558-2788-49d3-abe0-74a17ed4e7c9" +version = "1.5.4" +weakdeps = ["IntervalSets", "StaticArrays"] + + [deps.ConstructionBase.extensions] + ConstructionBaseIntervalSetsExt = "IntervalSets" + ConstructionBaseStaticArraysExt = "StaticArrays" + +[[deps.Contour]] +git-tree-sha1 = "d05d9e7b7aedff4e5b51a029dced05cfb6125781" +uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" +version = "0.6.2" + +[[deps.CpuId]] +deps = ["Markdown"] +git-tree-sha1 = "fcbb72b032692610bfbdb15018ac16a36cf2e406" +uuid = "adafc99b-e345-5852-983c-f28acb93d879" +version = "0.3.1" + +[[deps.DataAPI]] +git-tree-sha1 = "8da84edb865b0b5b0100c0666a9bc9a0b71c553c" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.15.0" + +[[deps.DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "3dbd312d370723b6bb43ba9d02fc36abade4518d" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.15" + +[[deps.DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.DelaunayTriangulation]] +deps = ["DataStructures", "EnumX", "ExactPredicates", "Random", "SimpleGraphs"] +git-tree-sha1 = "bea7984f7e09aeb28a3b071c420a0186cb4fabad" +uuid = "927a84f5-c5f4-47a5-9785-b46e178433df" +version = "0.8.8" + +[[deps.DelimitedFiles]] +deps = ["Mmap"] +git-tree-sha1 = "9e2f36d3c96a820c678f2f1f1782582fcf685bae" +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" +version = "1.9.1" + +[[deps.DiffResults]] +deps = ["StaticArraysCore"] +git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.1.0" + +[[deps.DiffRules]] +deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.15.1" + +[[deps.Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[deps.Distributions]] +deps = ["FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns", "Test"] +git-tree-sha1 = "3d5873f811f582873bb9871fc9c451784d5dc8c7" +uuid = "31c24e10-a181-5473-b8eb-7969acd0382f" +version = "0.25.102" + + [deps.Distributions.extensions] + DistributionsChainRulesCoreExt = "ChainRulesCore" + DistributionsDensityInterfaceExt = "DensityInterface" + + [deps.Distributions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d" + +[[deps.DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.9.3" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.DualNumbers]] +deps = ["Calculus", "NaNMath", "SpecialFunctions"] +git-tree-sha1 = "5837a837389fccf076445fce071c8ddaea35a566" +uuid = "fa6b7ba4-c1ee-5f82-b5fc-ecf0adba8f74" +version = "0.6.8" + +[[deps.EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e3290f2d49e661fbd94046d7e3726ffcb2d41053" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.2.4+0" + +[[deps.EnumX]] +git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" +uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56" +version = "1.0.4" + +[[deps.EpollShim_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8e9441ee83492030ace98f9789a654a6d0b1f643" +uuid = "2702e6a9-849d-5ed8-8c21-79e8b8f9ee43" +version = "0.0.20230411+0" + +[[deps.ErrorfreeArithmetic]] +git-tree-sha1 = "d6863c556f1142a061532e79f611aa46be201686" +uuid = "90fa49ef-747e-5e6f-a989-263ba693cf1a" +version = "0.5.2" + +[[deps.ExactPredicates]] +deps = ["IntervalArithmetic", "Random", "StaticArraysCore"] +git-tree-sha1 = "499b1ca78f6180c8f8bdf1cabde2d39120229e5c" +uuid = "429591f6-91af-11e9-00e2-59fbe8cec110" +version = "2.2.6" + +[[deps.ExceptionUnwrapping]] +deps = ["Test"] +git-tree-sha1 = "e90caa41f5a86296e014e148ee061bd6c3edec96" +uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" +version = "0.1.9" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.5.0+0" + +[[deps.Extents]] +git-tree-sha1 = "2140cd04483da90b2da7f99b2add0750504fc39c" +uuid = "411431e0-e8b7-467b-b5e0-f676ba4f2910" +version = "0.1.2" + +[[deps.FFMPEG]] +deps = ["FFMPEG_jll"] +git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.1" + +[[deps.FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "PCRE2_jll", "Pkg", "Zlib_jll", "libaom_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "74faea50c1d007c85837327f6775bea60b5492dd" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.4.2+2" + +[[deps.FFTW]] +deps = ["AbstractFFTs", "FFTW_jll", "LinearAlgebra", "MKL_jll", "Preferences", "Reexport"] +git-tree-sha1 = "b4fbdd20c889804969571cc589900803edda16b7" +uuid = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" +version = "1.7.1" + +[[deps.FFTW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c6033cc3892d0ef5bb9cd29b7f2f0331ea5184ea" +uuid = "f5851436-0d7a-5f13-b9de-f02708fd171a" +version = "3.3.10+0" + +[[deps.FastRounding]] +deps = ["ErrorfreeArithmetic", "LinearAlgebra"] +git-tree-sha1 = "6344aa18f654196be82e62816935225b3b9abe44" +uuid = "fa42c844-2597-5d31-933b-ebd51ab2693f" +version = "0.3.1" + +[[deps.FileIO]] +deps = ["Pkg", "Requires", "UUIDs"] +git-tree-sha1 = "299dc33549f68299137e51e6d49a13b5b1da9673" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.16.1" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.FillArrays]] +deps = ["LinearAlgebra", "Random"] +git-tree-sha1 = "35f0c0f345bff2c6d636f95fdb136323b5a796ef" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "1.7.0" +weakdeps = ["SparseArrays", "Statistics"] + + [deps.FillArrays.extensions] + FillArraysSparseArraysExt = "SparseArrays" + FillArraysStatisticsExt = "Statistics" + +[[deps.FiniteDiff]] +deps = ["ArrayInterface", "LinearAlgebra", "Requires", "Setfield", "SparseArrays"] +git-tree-sha1 = "c6e4a1fbe73b31a3dea94b1da449503b8830c306" +uuid = "6a86dc24-6348-571c-b903-95158fe2bd41" +version = "2.21.1" + + [deps.FiniteDiff.extensions] + FiniteDiffBandedMatricesExt = "BandedMatrices" + FiniteDiffBlockBandedMatricesExt = "BlockBandedMatrices" + FiniteDiffStaticArraysExt = "StaticArrays" + + [deps.FiniteDiff.weakdeps] + BandedMatrices = "aae01518-5342-5314-be14-df237901396f" + BlockBandedMatrices = "ffab5731-97b5-5995-9138-79e8c1846df0" + StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" + +[[deps.FixedPointNumbers]] +deps = ["Statistics"] +git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.8.4" + +[[deps.Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "21efd19106a55620a188615da6d3d06cd7f6ee03" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.93+0" + +[[deps.Formatting]] +deps = ["Printf"] +git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.2" + +[[deps.ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] +git-tree-sha1 = "cf0fe81336da9fb90944683b8c41984b08793dad" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.36" +weakdeps = ["StaticArrays"] + + [deps.ForwardDiff.extensions] + ForwardDiffStaticArraysExt = "StaticArrays" + +[[deps.FreeType]] +deps = ["CEnum", "FreeType2_jll"] +git-tree-sha1 = "50351f83f95282cf903e968d7c6e8d44a5f83d0b" +uuid = "b38be410-82b0-50bf-ab77-7b57e271db43" +version = "4.1.0" + +[[deps.FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "d8db6a5a2fe1381c1ea4ef2cab7c69c2de7f9ea0" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.13.1+0" + +[[deps.FreeTypeAbstraction]] +deps = ["ColorVectorSpace", "Colors", "FreeType", "GeometryBasics"] +git-tree-sha1 = "38a92e40157100e796690421e34a11c107205c86" +uuid = "663a7486-cb36-511b-a19d-713bb74d65c9" +version = "0.10.0" + +[[deps.FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "aa31987c2ba8704e23c6c8ba8a4f769d5d7e4f91" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.10+0" + +[[deps.Future]] +deps = ["Random"] +uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" + +[[deps.GLFW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"] +git-tree-sha1 = "d972031d28c8c8d9d7b41a536ad7bb0c2579caca" +uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" +version = "3.3.8+0" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.5" + +[[deps.GR]] +deps = ["Artifacts", "Base64", "DelimitedFiles", "Downloads", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Preferences", "Printf", "Random", "Serialization", "Sockets", "TOML", "Tar", "Test", "UUIDs", "p7zip_jll"] +git-tree-sha1 = "27442171f28c952804dede8ff72828a96f2bfc1f" +uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +version = "0.72.10" + +[[deps.GR_jll]] +deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "FreeType2_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Qt6Base_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "025d171a2847f616becc0f84c8dc62fe18f0f6dd" +uuid = "d2c73de3-f751-5644-a686-071e5b155ba9" +version = "0.72.10+0" + +[[deps.GeoInterface]] +deps = ["Extents"] +git-tree-sha1 = "d53480c0793b13341c40199190f92c611aa2e93c" +uuid = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +version = "1.3.2" + +[[deps.GeometryBasics]] +deps = ["EarCut_jll", "Extents", "GeoInterface", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "424a5a6ce7c5d97cca7bcc4eac551b97294c54af" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.4.9" + +[[deps.Gettext_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.21.0+0" + +[[deps.Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE2_jll", "Zlib_jll"] +git-tree-sha1 = "e94c92c7bf4819685eb80186d51c43e71d4afa17" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.76.5+0" + +[[deps.Graphics]] +deps = ["Colors", "LinearAlgebra", "NaNMath"] +git-tree-sha1 = "d61890399bc535850c4bf08e4e0d3a7ad0f21cbd" +uuid = "a2bd30eb-e257-5431-a919-1863eab51364" +version = "1.1.2" + +[[deps.Graphite2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "344bf40dcab1073aca04aa0df4fb092f920e4011" +uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472" +version = "1.3.14+0" + +[[deps.GridLayoutBase]] +deps = ["GeometryBasics", "InteractiveUtils", "Observables"] +git-tree-sha1 = "f57a64794b336d4990d90f80b147474b869b1bc4" +uuid = "3955a311-db13-416c-9275-1d80ed98e5e9" +version = "0.9.2" + +[[deps.Grisu]] +git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2" +uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" +version = "1.0.2" + +[[deps.HTTP]] +deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] +git-tree-sha1 = "5eab648309e2e060198b45820af1a37182de3cce" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "1.10.0" + +[[deps.HarfBuzz_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"] +git-tree-sha1 = "129acf094d168394e80ee1dc4bc06ec835e510a3" +uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" +version = "2.8.1+1" + +[[deps.HostCPUFeatures]] +deps = ["BitTwiddlingConvenienceFunctions", "IfElse", "Libdl", "Static"] +git-tree-sha1 = "eb8fed28f4994600e29beef49744639d985a04b2" +uuid = "3e5b6fbb-0976-4d2c-9146-d79de83f2fb0" +version = "0.1.16" + +[[deps.HypergeometricFunctions]] +deps = ["DualNumbers", "LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] +git-tree-sha1 = "f218fe3736ddf977e0e772bc9a586b2383da2685" +uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a" +version = "0.3.23" + +[[deps.IfElse]] +git-tree-sha1 = "debdd00ffef04665ccbb3e150747a77560e8fad1" +uuid = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173" +version = "0.1.1" + +[[deps.ImageAxes]] +deps = ["AxisArrays", "ImageBase", "ImageCore", "Reexport", "SimpleTraits"] +git-tree-sha1 = "2e4520d67b0cef90865b3ef727594d2a58e0e1f8" +uuid = "2803e5a7-5153-5ecf-9a86-9b4c37f5f5ac" +version = "0.6.11" + +[[deps.ImageBase]] +deps = ["ImageCore", "Reexport"] +git-tree-sha1 = "b51bb8cae22c66d0f6357e3bcb6363145ef20835" +uuid = "c817782e-172a-44cc-b673-b171935fbb9e" +version = "0.1.5" + +[[deps.ImageCore]] +deps = ["AbstractFFTs", "ColorVectorSpace", "Colors", "FixedPointNumbers", "Graphics", "MappedArrays", "MosaicViews", "OffsetArrays", "PaddedViews", "Reexport"] +git-tree-sha1 = "acf614720ef026d38400b3817614c45882d75500" +uuid = "a09fc81d-aa75-5fe9-8630-4744c3626534" +version = "0.9.4" + +[[deps.ImageIO]] +deps = ["FileIO", "IndirectArrays", "JpegTurbo", "LazyModules", "Netpbm", "OpenEXR", "PNGFiles", "QOI", "Sixel", "TiffImages", "UUIDs"] +git-tree-sha1 = "bca20b2f5d00c4fbc192c3212da8fa79f4688009" +uuid = "82e4d734-157c-48bb-816b-45c225c6df19" +version = "0.6.7" + +[[deps.ImageMetadata]] +deps = ["AxisArrays", "ImageAxes", "ImageBase", "ImageCore"] +git-tree-sha1 = "355e2b974f2e3212a75dfb60519de21361ad3cb7" +uuid = "bc367c6b-8a6b-528e-b4bd-a4b897500b49" +version = "0.9.9" + +[[deps.Imath_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "3d09a9f60edf77f8a4d99f9e015e8fbf9989605d" +uuid = "905a6f67-0a94-5f89-b386-d35d92009cd1" +version = "3.1.7+0" + +[[deps.IndirectArrays]] +git-tree-sha1 = "012e604e1c7458645cb8b436f8fba789a51b257f" +uuid = "9b13fd28-a010-5f03-acff-a1bbcff69959" +version = "1.0.0" + +[[deps.Inflate]] +git-tree-sha1 = "ea8031dea4aff6bd41f1df8f2fdfb25b33626381" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.4" + +[[deps.IntegerMathUtils]] +git-tree-sha1 = "b8ffb903da9f7b8cf695a8bead8e01814aa24b30" +uuid = "18e54dd8-cb9d-406c-a71d-865a43cbb235" +version = "0.1.2" + +[[deps.IntelOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ad37c091f7d7daf900963171600d7c1c5c3ede32" +uuid = "1d5cc7b8-4909-519e-a0f8-d0f5ad9712d0" +version = "2023.2.0+0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.Interpolations]] +deps = ["Adapt", "AxisAlgorithms", "ChainRulesCore", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "Requires", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] +git-tree-sha1 = "721ec2cf720536ad005cb38f50dbba7b02419a15" +uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +version = "0.14.7" + +[[deps.IntervalArithmetic]] +deps = ["CRlibm", "EnumX", "FastRounding", "LinearAlgebra", "Markdown", "Random", "RecipesBase", "RoundingEmulator", "SetRounding", "StaticArrays"] +git-tree-sha1 = "f59e639916283c1d2e106d2b00910b50f4dab76c" +uuid = "d1acc4aa-44c8-5952-acd4-ba5d80a2a253" +version = "0.21.2" + +[[deps.IntervalSets]] +deps = ["Dates", "Random"] +git-tree-sha1 = "3d8866c029dd6b16e69e0d4a939c4dfcb98fac47" +uuid = "8197267c-284f-5f27-9208-e0e47529a953" +version = "0.7.8" +weakdeps = ["Statistics"] + + [deps.IntervalSets.extensions] + IntervalSetsStatisticsExt = "Statistics" + +[[deps.IrrationalConstants]] +git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.2.2" + +[[deps.Isoband]] +deps = ["isoband_jll"] +git-tree-sha1 = "f9b6d97355599074dc867318950adaa6f9946137" +uuid = "f1662d9f-8043-43de-a69a-05efc1cc6ff4" +version = "0.1.1" + +[[deps.IterTools]] +git-tree-sha1 = "4ced6667f9974fc5c5943fa5e2ef1ca43ea9e450" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.8.0" + +[[deps.IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[deps.JLFzf]] +deps = ["Pipe", "REPL", "Random", "fzf_jll"] +git-tree-sha1 = "9fb0b890adab1c0a4a475d4210d51f228bfc250d" +uuid = "1019f520-868f-41f5-a6de-eb00f4b6a39c" +version = "0.1.6" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + +[[deps.JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.4" + +[[deps.JpegTurbo]] +deps = ["CEnum", "FileIO", "ImageCore", "JpegTurbo_jll", "TOML"] +git-tree-sha1 = "d65930fa2bc96b07d7691c652d701dcbe7d9cf0b" +uuid = "b835a17e-a41a-41e7-81f0-2f016b05efe0" +version = "0.1.4" + +[[deps.JpegTurbo_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6f2675ef130a300a112286de91973805fcc5ffbc" +uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" +version = "2.1.91+0" + +[[deps.KernelDensity]] +deps = ["Distributions", "DocStringExtensions", "FFTW", "Interpolations", "StatsBase"] +git-tree-sha1 = "90442c50e202a5cdf21a7899c66b240fdef14035" +uuid = "5ab0869b-81aa-558d-bb23-cbf5423bbe9b" +version = "0.6.7" + +[[deps.LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.1+0" + +[[deps.LERC_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "bf36f528eec6634efc60d7ec062008f171071434" +uuid = "88015f11-f218-50d7-93a8-a6af411a945d" +version = "3.0.0+1" + +[[deps.LLVMOpenMP_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f689897ccbe049adb19a065c495e75f372ecd42b" +uuid = "1d63c593-3942-5779-bab2-d838dc0a180e" +version = "15.0.4+0" + +[[deps.LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.1+0" + +[[deps.LaTeXStrings]] +git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.3.0" + +[[deps.Latexify]] +deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "OrderedCollections", "Printf", "Requires"] +git-tree-sha1 = "f428ae552340899a935973270b8d98e5a31c49fe" +uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +version = "0.16.1" + + [deps.Latexify.extensions] + DataFramesExt = "DataFrames" + SymEngineExt = "SymEngine" + + [deps.Latexify.weakdeps] + DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" + SymEngine = "123dc426-2d89-5057-bbad-38513e3affd8" + +[[deps.LayoutPointers]] +deps = ["ArrayInterface", "LinearAlgebra", "ManualMemory", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "88b8f66b604da079a627b6fb2860d3704a6729a1" +uuid = "10f19ff3-798f-405d-979b-55457f8fc047" +version = "0.1.14" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LazyModules]] +git-tree-sha1 = "a560dd966b386ac9ae60bdd3a3d3a326062d3c3e" +uuid = "8cdb02fc-e678-4876-92c5-9defec4f444e" +version = "0.3.1" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.3" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "7.84.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.10.2+0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0b4a5d71f3e5200a7dff793393e09dfc2d874290" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.2+1" + +[[deps.Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"] +git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.8.7+0" + +[[deps.Libglvnd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"] +git-tree-sha1 = "6f73d1dd803986947b2c750138528a999a6c7733" +uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29" +version = "1.6.0+0" + +[[deps.Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.42.0+0" + +[[deps.Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "f9557a255370125b405568f9767d6d195822a175" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.17.0+0" + +[[deps.Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9c30530bf0effd46e15e0fdcf2b8636e78cbbd73" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.35.0+0" + +[[deps.Libtiff_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "LERC_jll", "Libdl", "XZ_jll", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "2da088d113af58221c52828a80378e16be7d037a" +uuid = "89763e89-9b03-5906-acba-b20f662cd828" +version = "4.5.1+1" + +[[deps.Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.36.0+0" + +[[deps.LightXML]] +deps = ["Libdl", "XML2_jll"] +git-tree-sha1 = "e129d9391168c677cd4800f5c0abb1ed8cb3794f" +uuid = "9c8b4983-aa76-5018-a973-4c85ecc9e179" +version = "0.9.0" + +[[deps.LineSearches]] +deps = ["LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "Printf"] +git-tree-sha1 = "7bbea35cec17305fc70a0e5b4641477dc0789d9d" +uuid = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" +version = "7.2.0" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.LinearAlgebraX]] +deps = ["LinearAlgebra", "Mods", "Permutations", "Primes", "SimplePolynomials"] +git-tree-sha1 = "558a338f1eeabe933f9c2d4052aa7c2c707c3d52" +uuid = "9b3f67b0-2d00-526e-9884-9e4938f8fb88" +version = "0.1.12" + +[[deps.LogExpFunctions]] +deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "7d6dd4e9212aebaeed356de34ccf262a3cd415aa" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.26" + + [deps.LogExpFunctions.extensions] + LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" + LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" + LogExpFunctionsInverseFunctionsExt = "InverseFunctions" + + [deps.LogExpFunctions.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.0.3" + +[[deps.LoopVectorization]] +deps = ["ArrayInterface", "ArrayInterfaceCore", "CPUSummary", "CloseOpenIntervals", "DocStringExtensions", "HostCPUFeatures", "IfElse", "LayoutPointers", "LinearAlgebra", "OffsetArrays", "PolyesterWeave", "PrecompileTools", "SIMDTypes", "SLEEFPirates", "Static", "StaticArrayInterface", "ThreadingUtilities", "UnPack", "VectorizationBase"] +git-tree-sha1 = "c88a4afe1703d731b1c4fdf4e3c7e77e3b176ea2" +uuid = "bdcacae8-1622-11e9-2a5c-532679323890" +version = "0.12.165" +weakdeps = ["ChainRulesCore", "ForwardDiff", "SpecialFunctions"] + + [deps.LoopVectorization.extensions] + ForwardDiffExt = ["ChainRulesCore", "ForwardDiff"] + SpecialFunctionsExt = "SpecialFunctions" + +[[deps.MKL_jll]] +deps = ["Artifacts", "IntelOpenMP_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] +git-tree-sha1 = "eb006abbd7041c28e0d16260e50a24f8f9104913" +uuid = "856f044c-d86e-5d09-b602-aeab76dc8ba7" +version = "2023.2.0+0" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "9ee1618cbf5240e6d4e0371d6f24065083f60c48" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.11" + +[[deps.Makie]] +deps = ["Animations", "Base64", "CRC32c", "ColorBrewer", "ColorSchemes", "ColorTypes", "Colors", "Contour", "DelaunayTriangulation", "Distributions", "DocStringExtensions", "Downloads", "FFMPEG_jll", "FileIO", "FixedPointNumbers", "Formatting", "FreeType", "FreeTypeAbstraction", "GeometryBasics", "GridLayoutBase", "ImageIO", "InteractiveUtils", "IntervalSets", "Isoband", "KernelDensity", "LaTeXStrings", "LinearAlgebra", "MacroTools", "MakieCore", "Markdown", "Match", "MathTeXEngine", "Observables", "OffsetArrays", "Packing", "PlotUtils", "PolygonOps", "PrecompileTools", "Printf", "REPL", "Random", "RelocatableFolders", "Setfield", "ShaderAbstractions", "Showoff", "SignedDistanceFields", "SparseArrays", "StableHashTraits", "Statistics", "StatsBase", "StatsFuns", "StructArrays", "TriplotBase", "UnicodeFun"] +git-tree-sha1 = "1d16d20279a145119899b4205258332f0fbeaa94" +uuid = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" +version = "0.19.11" + +[[deps.MakieCore]] +deps = ["Observables", "REPL"] +git-tree-sha1 = "a94bf3fef9c690a2a4ac1d09d86a59ab89c7f8e4" +uuid = "20f20a25-4f0e-4fdf-b5d1-57303727442b" +version = "0.6.8" + +[[deps.ManualMemory]] +git-tree-sha1 = "bcaef4fc7a0cfe2cba636d84cda54b5e4e4ca3cd" +uuid = "d125e4d3-2237-4719-b19c-fa641b8a4667" +version = "0.1.8" + +[[deps.MappedArrays]] +git-tree-sha1 = "2dab0221fe2b0f2cb6754eaa743cc266339f527e" +uuid = "dbb5928d-eab1-5f90-85c2-b9b0edb7c900" +version = "0.4.2" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.Match]] +git-tree-sha1 = "1d9bc5c1a6e7ee24effb93f175c9342f9154d97f" +uuid = "7eb4fadd-790c-5f42-8a69-bfa0b872bfbf" +version = "1.2.0" + +[[deps.MathTeXEngine]] +deps = ["AbstractTrees", "Automa", "DataStructures", "FreeTypeAbstraction", "GeometryBasics", "LaTeXStrings", "REPL", "RelocatableFolders", "Test", "UnicodeFun"] +git-tree-sha1 = "8f52dbaa1351ce4cb847d95568cb29e62a307d93" +uuid = "0a4f8689-d25c-4efe-a92b-7142dfc1aa53" +version = "0.5.6" + +[[deps.MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "Random", "Sockets"] +git-tree-sha1 = "03a9b9718f5682ecb107ac9f7308991db4ce395b" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.1.7" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+0" + +[[deps.Measures]] +git-tree-sha1 = "c13304c81eec1ed3af7fc20e75fb6b26092a1102" +uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" +version = "0.3.2" + +[[deps.Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "f66bdc5de519e8f8ae43bdc598782d35a25b1272" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.1.0" + +[[deps.Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[deps.Mods]] +git-tree-sha1 = "61be59e4daffff43a8cec04b5e0dc773cbb5db3a" +uuid = "7475f97c-0381-53b1-977b-4c60186c8d62" +version = "1.3.3" + +[[deps.MosaicViews]] +deps = ["MappedArrays", "OffsetArrays", "PaddedViews", "StackViews"] +git-tree-sha1 = "7b86a5d4d70a9f5cdf2dacb3cbe6d251d1a61dbe" +uuid = "e94cdb99-869f-56ef-bcf0-1ae2bcbe0389" +version = "0.3.4" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.10.11" + +[[deps.Multisets]] +git-tree-sha1 = "8d852646862c96e226367ad10c8af56099b4047e" +uuid = "3b2b4ff1-bcff-5658-a3ee-dbcf1ce5ac09" +version = "0.4.4" + +[[deps.NLSolversBase]] +deps = ["DiffResults", "Distributed", "FiniteDiff", "ForwardDiff"] +git-tree-sha1 = "a0b464d183da839699f4c79e7606d9d186ec172c" +uuid = "d41bc354-129a-5804-8e4c-c37616107c6c" +version = "7.8.3" + +[[deps.NaNMath]] +deps = ["OpenLibm_jll"] +git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "1.0.2" + +[[deps.Netpbm]] +deps = ["FileIO", "ImageCore", "ImageMetadata"] +git-tree-sha1 = "d92b107dbb887293622df7697a2223f9f8176fcd" +uuid = "f09324ee-3d7c-5217-9330-fc30815ba969" +version = "1.1.1" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.Observables]] +git-tree-sha1 = "6862738f9796b3edc1c09d0890afce4eca9e7e93" +uuid = "510215fc-4207-5dde-b226-833fc4488ee2" +version = "0.5.4" + +[[deps.OffsetArrays]] +deps = ["Adapt"] +git-tree-sha1 = "2ac17d29c523ce1cd38e27785a7d23024853a4bb" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.12.10" + +[[deps.Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "887579a3eb005446d514ab7aeac5d1d027658b8f" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.5+1" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.21+4" + +[[deps.OpenEXR]] +deps = ["Colors", "FileIO", "OpenEXR_jll"] +git-tree-sha1 = "327f53360fdb54df7ecd01e96ef1983536d1e633" +uuid = "52e1d378-f018-4a11-a4be-720524705ac7" +version = "0.3.2" + +[[deps.OpenEXR_jll]] +deps = ["Artifacts", "Imath_jll", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "a4ca623df1ae99d09bc9868b008262d0c0ac1e4f" +uuid = "18a262bb-aa17-5467-a713-aee519bc75cb" +version = "3.1.4+0" + +[[deps.OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" +version = "0.8.1+0" + +[[deps.OpenSSL]] +deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] +git-tree-sha1 = "51901a49222b09e3743c65b8847687ae5fc78eb2" +uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" +version = "1.4.1" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "a12e56c72edee3ce6b96667745e6cbbe5498f200" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.23+0" + +[[deps.OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[deps.Optim]] +deps = ["Compat", "FillArrays", "ForwardDiff", "LineSearches", "LinearAlgebra", "NLSolversBase", "NaNMath", "Parameters", "PositiveFactorizations", "Printf", "SparseArrays", "StatsBase"] +git-tree-sha1 = "01f85d9269b13fedc61e63cc72ee2213565f7a72" +uuid = "429524aa-4258-5aef-a3af-852621145aeb" +version = "1.7.8" + +[[deps.Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "51a08fb14ec28da2ec7a927c4337e4332c2a4720" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.2+0" + +[[deps.OrderedCollections]] +git-tree-sha1 = "2e73fe17cac3c62ad1aebe70d44c963c3cfdc3e3" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.6.2" + +[[deps.PCRE2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15" +version = "10.42.0+0" + +[[deps.PDMats]] +deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"] +git-tree-sha1 = "66b2fcd977db5329aa35cac121e5b94dd6472198" +uuid = "90014a1f-27ba-587c-ab20-58faa44d9150" +version = "0.11.28" + +[[deps.PNGFiles]] +deps = ["Base64", "CEnum", "ImageCore", "IndirectArrays", "OffsetArrays", "libpng_jll"] +git-tree-sha1 = "5ded86ccaf0647349231ed6c0822c10886d4a1ee" +uuid = "f57f5aa1-a3ce-4bc8-8ab9-96f992907883" +version = "0.4.1" + +[[deps.Packing]] +deps = ["GeometryBasics"] +git-tree-sha1 = "ec3edfe723df33528e085e632414499f26650501" +uuid = "19eb6ba3-879d-56ad-ad62-d5c202156566" +version = "0.5.0" + +[[deps.PaddedViews]] +deps = ["OffsetArrays"] +git-tree-sha1 = "0fac6313486baae819364c52b4f483450a9d793f" +uuid = "5432bcbf-9aad-5242-b902-cca2824c8663" +version = "0.5.12" + +[[deps.Pango_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "FriBidi_jll", "Glib_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4745216e94f71cb768d58330b059c9b76f32cb66" +uuid = "36c8627f-9965-5494-a995-c6b170f724f3" +version = "1.50.14+0" + +[[deps.Parameters]] +deps = ["OrderedCollections", "UnPack"] +git-tree-sha1 = "34c0e9ad262e5f7fc75b10a9952ca7692cfc5fbe" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.12.3" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "716e24b21538abc91f6205fd1d8363f39b442851" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.7.2" + +[[deps.Permutations]] +deps = ["Combinatorics", "LinearAlgebra", "Random"] +git-tree-sha1 = "25e2bb0973689836bf164ecb960762f1bb8794dd" +uuid = "2ae35dd2-176d-5d53-8349-f30d82d94d4f" +version = "0.4.17" + +[[deps.Pipe]] +git-tree-sha1 = "6842804e7867b115ca9de748a0cf6b364523c16d" +uuid = "b98c9c47-44ae-5843-9183-064241ee97a0" +version = "1.3.0" + +[[deps.Pixman_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LLVMOpenMP_jll", "Libdl"] +git-tree-sha1 = "64779bc4c9784fee475689a1752ef4d5747c5e87" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.42.2+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.9.0" + +[[deps.PkgVersion]] +deps = ["Pkg"] +git-tree-sha1 = "f9501cc0430a26bc3d156ae1b5b0c1b47af4d6da" +uuid = "eebad327-c553-4316-9ea0-9fa01ccd7688" +version = "0.3.3" + +[[deps.PlotThemes]] +deps = ["PlotUtils", "Statistics"] +git-tree-sha1 = "1f03a2d339f42dca4a4da149c7e15e9b896ad899" +uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" +version = "3.1.0" + +[[deps.PlotUtils]] +deps = ["ColorSchemes", "Colors", "Dates", "PrecompileTools", "Printf", "Random", "Reexport", "Statistics"] +git-tree-sha1 = "f92e1315dadf8c46561fb9396e525f7200cdc227" +uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" +version = "1.3.5" + +[[deps.Plots]] +deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "JLFzf", "JSON", "LaTeXStrings", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "PrecompileTools", "Preferences", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "RelocatableFolders", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun", "UnitfulLatexify", "Unzip"] +git-tree-sha1 = "ccee59c6e48e6f2edf8a5b64dc817b6729f99eb5" +uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +version = "1.39.0" + + [deps.Plots.extensions] + FileIOExt = "FileIO" + GeometryBasicsExt = "GeometryBasics" + IJuliaExt = "IJulia" + ImageInTerminalExt = "ImageInTerminal" + UnitfulExt = "Unitful" + + [deps.Plots.weakdeps] + FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" + GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" + IJulia = "7073ff75-c697-5162-941a-fcdaad2a7d2a" + ImageInTerminal = "d8c32880-2388-543b-8c61-d9f865259254" + Unitful = "1986cc42-f94f-5a68-af5c-568840ba703d" + +[[deps.PolyesterWeave]] +deps = ["BitTwiddlingConvenienceFunctions", "CPUSummary", "IfElse", "Static", "ThreadingUtilities"] +git-tree-sha1 = "240d7170f5ffdb285f9427b92333c3463bf65bf6" +uuid = "1d0040c9-8b98-4ee7-8388-3f51789ca0ad" +version = "0.2.1" + +[[deps.PolygonOps]] +git-tree-sha1 = "77b3d3605fc1cd0b42d95eba87dfcd2bf67d5ff6" +uuid = "647866c9-e3ac-4575-94e7-e3d426903924" +version = "0.1.2" + +[[deps.Polynomials]] +deps = ["LinearAlgebra", "RecipesBase", "Setfield", "SparseArrays"] +git-tree-sha1 = "ea78a2764f31715093de7ab495e12c0187f231d1" +uuid = "f27b6e38-b328-58d1-80ce-0feddd5e7a45" +version = "4.0.4" + + [deps.Polynomials.extensions] + PolynomialsChainRulesCoreExt = "ChainRulesCore" + PolynomialsFFTWExt = "FFTW" + PolynomialsMakieCoreExt = "MakieCore" + PolynomialsMutableArithmeticsExt = "MutableArithmetics" + + [deps.Polynomials.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341" + MakieCore = "20f20a25-4f0e-4fdf-b5d1-57303727442b" + MutableArithmetics = "d8a4904e-b15c-11e9-3269-09a3773c0cb0" + +[[deps.PositiveFactorizations]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "17275485f373e6673f7e7f97051f703ed5b15b20" +uuid = "85a6dd25-e78a-55b7-8502-1745935b8125" +version = "0.2.4" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.0" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.1" + +[[deps.Primes]] +deps = ["IntegerMathUtils"] +git-tree-sha1 = "4c9f306e5d6603ae203c2000dd460d81a5251489" +uuid = "27ebfcd6-29c5-5fa9-bf4b-fb8fc14df3ae" +version = "0.5.4" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.ProgressMeter]] +deps = ["Distributed", "Printf"] +git-tree-sha1 = "00099623ffee15972c16111bcf84c58a0051257c" +uuid = "92933f4c-e287-5a05-a399-4b506db050ca" +version = "1.9.0" + +[[deps.QOI]] +deps = ["ColorTypes", "FileIO", "FixedPointNumbers"] +git-tree-sha1 = "18e8f4d1426e965c7b532ddd260599e1510d26ce" +uuid = "4b34888f-f399-49d4-9bb3-47ed5cae4e65" +version = "1.0.0" + +[[deps.Qt6Base_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Vulkan_Loader_jll", "Xorg_libSM_jll", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_cursor_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "libinput_jll", "xkbcommon_jll"] +git-tree-sha1 = "7c29f0e8c575428bd84dc3c72ece5178caa67336" +uuid = "c0090381-4147-56d7-9ebc-da0b1113ec56" +version = "6.5.2+2" + +[[deps.QuadGK]] +deps = ["DataStructures", "LinearAlgebra"] +git-tree-sha1 = "9ebcd48c498668c7fa0e97a9cae873fbee7bfee1" +uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc" +version = "2.9.1" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.RangeArrays]] +git-tree-sha1 = "b9039e93773ddcfc828f12aadf7115b4b4d225f5" +uuid = "b3c3ace0-ae52-54e7-9d0b-2c1406fd6b9d" +version = "0.3.2" + +[[deps.Ratios]] +deps = ["Requires"] +git-tree-sha1 = "1342a47bf3260ee108163042310d26f2be5ec90b" +uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" +version = "0.4.5" +weakdeps = ["FixedPointNumbers"] + + [deps.Ratios.extensions] + RatiosFixedPointNumbersExt = "FixedPointNumbers" + +[[deps.RecipesBase]] +deps = ["PrecompileTools"] +git-tree-sha1 = "5c3d09cc4f31f5fc6af001c250bf1278733100ff" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.3.4" + +[[deps.RecipesPipeline]] +deps = ["Dates", "NaNMath", "PlotUtils", "PrecompileTools", "RecipesBase"] +git-tree-sha1 = "45cf9fd0ca5839d06ef333c8201714e888486342" +uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" +version = "0.6.12" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.RelocatableFolders]] +deps = ["SHA", "Scratch"] +git-tree-sha1 = "ffdaf70d81cf6ff22c2b6e733c900c3321cab864" +uuid = "05181044-ff0b-4ac5-8273-598c1e38db00" +version = "1.0.1" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.RingLists]] +deps = ["Random"] +git-tree-sha1 = "f39da63aa6d2d88e0c1bd20ed6a3ff9ea7171ada" +uuid = "286e9d63-9694-5540-9e3c-4e6708fa07b2" +version = "0.2.8" + +[[deps.Rmath]] +deps = ["Random", "Rmath_jll"] +git-tree-sha1 = "f65dcb5fa46aee0cf9ed6274ccbd597adc49aa7b" +uuid = "79098fc4-a85e-5d69-aa6a-4863f24498fa" +version = "0.7.1" + +[[deps.Rmath_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6ed52fdd3382cf21947b15e8870ac0ddbff736da" +uuid = "f50d1b31-88e8-58de-be2c-1cc44531875f" +version = "0.4.0+0" + +[[deps.RoundingEmulator]] +git-tree-sha1 = "40b9edad2e5287e05bd413a38f61a8ff55b9557b" +uuid = "5eaf0fd0-dfba-4ccb-bf02-d820a40db705" +version = "0.2.1" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SIMDTypes]] +git-tree-sha1 = "330289636fb8107c5f32088d2741e9fd7a061a5c" +uuid = "94e857df-77ce-4151-89e5-788b33177be4" +version = "0.1.0" + +[[deps.SLEEFPirates]] +deps = ["IfElse", "Static", "VectorizationBase"] +git-tree-sha1 = "4b8586aece42bee682399c4c4aee95446aa5cd19" +uuid = "476501e8-09a2-5ece-8869-fb82de89a1fa" +version = "0.6.39" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "30449ee12237627992a99d5e30ae63e4d78cd24a" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.2.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.SetRounding]] +git-tree-sha1 = "d7a25e439d07a17b7cdf97eecee504c50fedf5f6" +uuid = "3cc68bcd-71a2-5612-b932-767ffbe40ab0" +version = "0.2.1" + +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + +[[deps.ShaderAbstractions]] +deps = ["ColorTypes", "FixedPointNumbers", "GeometryBasics", "LinearAlgebra", "Observables", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "db0219befe4507878b1a90e07820fed3e62c289d" +uuid = "65257c39-d410-5151-9873-9b3e5be5013e" +version = "0.4.0" + +[[deps.SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[deps.Showoff]] +deps = ["Dates", "Grisu"] +git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de" +uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" +version = "1.0.3" + +[[deps.SignedDistanceFields]] +deps = ["Random", "Statistics", "Test"] +git-tree-sha1 = "d263a08ec505853a5ff1c1ebde2070419e3f28e9" +uuid = "73760f76-fbc4-59ce-8f25-708e95d2df96" +version = "0.4.0" + +[[deps.SimpleBufferStream]] +git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" +uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" +version = "1.1.0" + +[[deps.SimpleGraphs]] +deps = ["AbstractLattices", "Combinatorics", "DataStructures", "IterTools", "LightXML", "LinearAlgebra", "LinearAlgebraX", "Optim", "Primes", "Random", "RingLists", "SimplePartitions", "SimplePolynomials", "SimpleRandom", "SparseArrays", "Statistics"] +git-tree-sha1 = "b608903049d11cc557c45e03b3a53e9260579c19" +uuid = "55797a34-41de-5266-9ec1-32ac4eb504d3" +version = "0.8.4" + +[[deps.SimplePartitions]] +deps = ["AbstractLattices", "DataStructures", "Permutations"] +git-tree-sha1 = "dcc02923a53f316ab97da8ef3136e80b4543dbf1" +uuid = "ec83eff0-a5b5-5643-ae32-5cbf6eedec9d" +version = "0.3.0" + +[[deps.SimplePolynomials]] +deps = ["Mods", "Multisets", "Polynomials", "Primes"] +git-tree-sha1 = "d537c31cf9995236166e3e9afc424a5a1c59ff9d" +uuid = "cc47b68c-3164-5771-a705-2bc0097375a0" +version = "0.2.14" + +[[deps.SimpleRandom]] +deps = ["Distributions", "LinearAlgebra", "Random"] +git-tree-sha1 = "3a6fb395e37afab81aeea85bae48a4db5cd7244a" +uuid = "a6525b86-64cd-54fa-8f65-62fc48bdc0e8" +version = "0.3.1" + +[[deps.SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.4" + +[[deps.Sixel]] +deps = ["Dates", "FileIO", "ImageCore", "IndirectArrays", "OffsetArrays", "REPL", "libsixel_jll"] +git-tree-sha1 = "2da10356e31327c7096832eb9cd86307a50b1eb6" +uuid = "45858cf5-a6b0-47a3-bbea-62219f50df47" +version = "0.1.3" + +[[deps.SnoopPrecompile]] +deps = ["Preferences"] +git-tree-sha1 = "e760a70afdcd461cf01a575947738d359234665c" +uuid = "66db9d55-30c0-4569-8b51-7e840670fc0c" +version = "1.0.3" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "5165dfb9fd131cf0c6957a3a7605dede376e7b63" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.2.0" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.SpecialFunctions]] +deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "e2cfc4012a19088254b3950b85c3c1d8882d864d" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "2.3.1" +weakdeps = ["ChainRulesCore"] + + [deps.SpecialFunctions.extensions] + SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" + +[[deps.StableHashTraits]] +deps = ["Compat", "SHA", "Tables", "TupleTools"] +git-tree-sha1 = "30edbce1c797dc7d4c74bc07b2b6a57b891bead3" +uuid = "c5dd0088-6c3f-4803-b00e-f31a60c170fa" +version = "1.1.0" + +[[deps.StackViews]] +deps = ["OffsetArrays"] +git-tree-sha1 = "46e589465204cd0c08b4bd97385e4fa79a0c770c" +uuid = "cae243ae-269e-4f55-b966-ac2d0dc13c15" +version = "0.1.1" + +[[deps.Static]] +deps = ["IfElse"] +git-tree-sha1 = "f295e0a1da4ca425659c57441bcb59abb035a4bc" +uuid = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" +version = "0.8.8" + +[[deps.StaticArrayInterface]] +deps = ["ArrayInterface", "Compat", "IfElse", "LinearAlgebra", "PrecompileTools", "Requires", "SparseArrays", "Static", "SuiteSparse"] +git-tree-sha1 = "03fec6800a986d191f64f5c0996b59ed526eda25" +uuid = "0d7ed370-da01-4f52-bd93-41d350b8b718" +version = "1.4.1" +weakdeps = ["OffsetArrays", "StaticArrays"] + + [deps.StaticArrayInterface.extensions] + StaticArrayInterfaceOffsetArraysExt = "OffsetArrays" + StaticArrayInterfaceStaticArraysExt = "StaticArrays" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "Random", "StaticArraysCore"] +git-tree-sha1 = "0adf069a2a490c47273727e029371b31d44b72b2" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.6.5" +weakdeps = ["Statistics"] + + [deps.StaticArrays.extensions] + StaticArraysStatisticsExt = "Statistics" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.2" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.9.0" + +[[deps.StatsAPI]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1ff449ad350c9c4cbc756624d6f8a8c3ef56d3ed" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.7.0" + +[[deps.StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "1d77abd07f617c4868c33d4f5b9e1dbb2643c9cf" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.34.2" + +[[deps.StatsFuns]] +deps = ["HypergeometricFunctions", "IrrationalConstants", "LogExpFunctions", "Reexport", "Rmath", "SpecialFunctions"] +git-tree-sha1 = "f625d686d5a88bcd2b15cd81f18f98186fdc0c9a" +uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c" +version = "1.3.0" + + [deps.StatsFuns.extensions] + StatsFunsChainRulesCoreExt = "ChainRulesCore" + StatsFunsInverseFunctionsExt = "InverseFunctions" + + [deps.StatsFuns.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.StructArrays]] +deps = ["Adapt", "ConstructionBase", "DataAPI", "GPUArraysCore", "StaticArraysCore", "Tables"] +git-tree-sha1 = "0a3db38e4cce3c54fe7a71f831cd7b6194a54213" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.6.16" + +[[deps.SuiteSparse]] +deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"] +uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "5.10.1+6" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[deps.Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] +git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.11.1" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.TensorCore]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "1feb45f88d133a655e001435632f019a9a1bcdb6" +uuid = "62fd8b95-f654-4bbd-a8a5-9c27f68ccd50" +version = "0.1.1" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.ThreadingUtilities]] +deps = ["ManualMemory"] +git-tree-sha1 = "eda08f7e9818eb53661b3deb74e3159460dfbc27" +uuid = "8290d209-cae3-49c0-8002-c8c24d57dab5" +version = "0.5.2" + +[[deps.TiffImages]] +deps = ["ColorTypes", "DataStructures", "DocStringExtensions", "FileIO", "FixedPointNumbers", "IndirectArrays", "Inflate", "Mmap", "OffsetArrays", "PkgVersion", "ProgressMeter", "UUIDs"] +git-tree-sha1 = "34cc045dd0aaa59b8bbe86c644679bc57f1d5bd0" +uuid = "731e570b-9d59-4bfa-96dc-6df516fadf69" +version = "0.6.8" + +[[deps.TranscodingStreams]] +deps = ["Random", "Test"] +git-tree-sha1 = "9a6ae7ed916312b41236fcef7e0af564ef934769" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.9.13" + +[[deps.TriplotBase]] +git-tree-sha1 = "4d4ed7f294cda19382ff7de4c137d24d16adc89b" +uuid = "981d1d27-644d-49a2-9326-4793e63143c3" +version = "0.1.0" + +[[deps.TupleTools]] +git-tree-sha1 = "155515ed4c4236db30049ac1495e2969cc06be9d" +uuid = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6" +version = "1.4.3" + +[[deps.URIs]] +git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.5.1" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.UnPack]] +git-tree-sha1 = "387c1f73762231e86e0c9c5443ce3b4a0a9a0c2b" +uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed" +version = "1.0.2" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.UnicodeFun]] +deps = ["REPL"] +git-tree-sha1 = "53915e50200959667e78a92a418594b428dffddf" +uuid = "1cfade01-22cf-5700-b092-accc4b62d6e1" +version = "0.4.1" + +[[deps.Unitful]] +deps = ["Dates", "LinearAlgebra", "Random"] +git-tree-sha1 = "a72d22c7e13fe2de562feda8645aa134712a87ee" +uuid = "1986cc42-f94f-5a68-af5c-568840ba703d" +version = "1.17.0" + + [deps.Unitful.extensions] + ConstructionBaseUnitfulExt = "ConstructionBase" + InverseFunctionsUnitfulExt = "InverseFunctions" + + [deps.Unitful.weakdeps] + ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" + InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" + +[[deps.UnitfulLatexify]] +deps = ["LaTeXStrings", "Latexify", "Unitful"] +git-tree-sha1 = "e2d817cc500e960fdbafcf988ac8436ba3208bfd" +uuid = "45397f5d-5981-4c77-b2b3-fc36d6e9b728" +version = "1.6.3" + +[[deps.Unzip]] +git-tree-sha1 = "ca0969166a028236229f63514992fc073799bb78" +uuid = "41fe7b60-77ed-43a1-b4f0-825fd5a5650d" +version = "0.2.0" + +[[deps.VectorizationBase]] +deps = ["ArrayInterface", "CPUSummary", "HostCPUFeatures", "IfElse", "LayoutPointers", "Libdl", "LinearAlgebra", "SIMDTypes", "Static", "StaticArrayInterface"] +git-tree-sha1 = "b182207d4af54ac64cbc71797765068fdeff475d" +uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f" +version = "0.21.64" + +[[deps.Vulkan_Loader_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Wayland_jll", "Xorg_libX11_jll", "Xorg_libXrandr_jll", "xkbcommon_jll"] +git-tree-sha1 = "2f0486047a07670caad3a81a075d2e518acc5c59" +uuid = "a44049a8-05dd-5a78-86c9-5fde0876e88c" +version = "1.3.243+0" + +[[deps.Wayland_jll]] +deps = ["Artifacts", "EpollShim_jll", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "7558e29847e99bc3f04d6569e82d0f5c54460703" +uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89" +version = "1.21.0+1" + +[[deps.Wayland_protocols_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4528479aa01ee1b3b4cd0e6faef0e04cf16466da" +uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91" +version = "1.25.0+0" + +[[deps.WoodburyMatrices]] +deps = ["LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "de67fa59e33ad156a590055375a30b23c40299d3" +uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" +version = "0.5.5" + +[[deps.XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Zlib_jll"] +git-tree-sha1 = "24b81b59bd35b3c42ab84fa589086e19be919916" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.11.5+0" + +[[deps.XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"] +git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.34+0" + +[[deps.XZ_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "cf2c7de82431ca6f39250d2fc4aacd0daa1675c0" +uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" +version = "5.4.4+0" + +[[deps.Xorg_libICE_jll]] +deps = ["Libdl", "Pkg"] +git-tree-sha1 = "e5becd4411063bdcac16be8b66fc2f9f6f1e8fe5" +uuid = "f67eecfb-183a-506d-b269-f58e52b52d7c" +version = "1.0.10+1" + +[[deps.Xorg_libSM_jll]] +deps = ["Libdl", "Pkg", "Xorg_libICE_jll"] +git-tree-sha1 = "4a9d9e4c180e1e8119b5ffc224a7b59d3a7f7e18" +uuid = "c834827a-8449-5923-a945-d239c165b7dd" +version = "1.2.3+0" + +[[deps.Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "afead5aba5aa507ad5a3bf01f58f82c8d1403495" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.8.6+0" + +[[deps.Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "6035850dcc70518ca32f012e46015b9beeda49d8" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.11+0" + +[[deps.Xorg_libXcursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd" +uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724" +version = "1.2.0+4" + +[[deps.Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "34d526d318358a859d7de23da945578e8e8727b7" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.4+0" + +[[deps.Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.4+4" + +[[deps.Xorg_libXfixes_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4" +uuid = "d091e8ba-531a-589c-9de9-94069b037ed8" +version = "5.0.3+4" + +[[deps.Xorg_libXi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"] +git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246" +uuid = "a51aa0fd-4e3c-5386-b890-e753decda492" +version = "1.7.10+4" + +[[deps.Xorg_libXinerama_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"] +git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123" +uuid = "d1454406-59df-5ea1-beac-c340f2130bc3" +version = "1.1.4+4" + +[[deps.Xorg_libXrandr_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631" +uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484" +version = "1.5.2+4" + +[[deps.Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.10+4" + +[[deps.Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "8fdda4c692503d44d04a0603d9ac0982054635f9" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.1+0" + +[[deps.Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "b4bfde5d5b652e22b9c790ad00af08b6d042b97d" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.15.0+0" + +[[deps.Xorg_libxkbfile_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libX11_jll"] +git-tree-sha1 = "730eeca102434283c50ccf7d1ecdadf521a765a4" +uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a" +version = "1.1.2+0" + +[[deps.Xorg_xcb_util_cursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_jll", "Xorg_xcb_util_renderutil_jll"] +git-tree-sha1 = "04341cb870f29dcd5e39055f895c39d016e18ccd" +uuid = "e920d4aa-a673-5f3a-b3d7-f755a4d47c43" +version = "0.1.4+0" + +[[deps.Xorg_xcb_util_image_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97" +uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"] +git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1" +uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_keysyms_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00" +uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7" +version = "0.4.0+1" + +[[deps.Xorg_xcb_util_renderutil_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e" +uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e" +version = "0.3.9+1" + +[[deps.Xorg_xcb_util_wm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67" +uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361" +version = "0.4.1+1" + +[[deps.Xorg_xkbcomp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_libxkbfile_jll"] +git-tree-sha1 = "330f955bc41bb8f5270a369c473fc4a5a4e4d3cb" +uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4" +version = "1.4.6+0" + +[[deps.Xorg_xkeyboard_config_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Xorg_xkbcomp_jll"] +git-tree-sha1 = "691634e5453ad362044e2ad653e79f3ee3bb98c3" +uuid = "33bec58e-1273-512f-9401-5d533626f822" +version = "2.39.0+0" + +[[deps.Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "e92a1a012a10506618f10b7047e478403a046c77" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.5.0+0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+0" + +[[deps.Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "49ce682769cd5de6c72dcf1b94ed7790cd08974c" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.5.5+0" + +[[deps.eudev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "gperf_jll"] +git-tree-sha1 = "431b678a28ebb559d224c0b6b6d01afce87c51ba" +uuid = "35ca27e7-8b34-5b7f-bca9-bdc33f59eb06" +version = "3.2.9+0" + +[[deps.fzf_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "47cf33e62e138b920039e8ff9f9841aafe1b733e" +uuid = "214eeab7-80f7-51ab-84ad-2988db7cef09" +version = "0.35.1+0" + +[[deps.gperf_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "3516a5630f741c9eecb3720b1ec9d8edc3ecc033" +uuid = "1a1c6b14-54f6-533d-8383-74cd7377aa70" +version = "3.1.1+0" + +[[deps.isoband_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "51b5eeb3f98367157a7a12a1fb0aa5328946c03c" +uuid = "9a68df92-36a6-505f-a73e-abb412b6bfb4" +version = "0.2.3+0" + +[[deps.libaom_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "3a2ea60308f0996d26f1e5354e10c24e9ef905d4" +uuid = "a4ae2306-e953-59d6-aa16-d00cac43593b" +version = "3.4.0+0" + +[[deps.libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "5982a94fcba20f02f42ace44b9894ee2b140fe47" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.15.1+0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.8.0+0" + +[[deps.libevdev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "141fe65dc3efabb0b1d5ba74e91f6ad26f84cc22" +uuid = "2db6ffa8-e38f-5e21-84af-90c45d0032cc" +version = "1.11.0+0" + +[[deps.libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "daacc84a041563f965be61859a36e17c4e4fcd55" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "2.0.2+0" + +[[deps.libinput_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "eudev_jll", "libevdev_jll", "mtdev_jll"] +git-tree-sha1 = "ad50e5b90f222cfe78aa3d5183a20a12de1322ce" +uuid = "36db933b-70db-51c0-b978-0f229ee0e533" +version = "1.18.0+0" + +[[deps.libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "94d180a6d2b5e55e447e2d27a29ed04fe79eb30c" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.38+0" + +[[deps.libsixel_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "libpng_jll"] +git-tree-sha1 = "d4f63314c8aa1e48cd22aa0c17ed76cd1ae48c3c" +uuid = "075b6546-f08a-558a-be8f-8157d0f608a5" +version = "1.10.3+0" + +[[deps.libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "b910cb81ef3fe6e78bf6acee440bda86fd6ae00c" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.7+1" + +[[deps.mtdev_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "814e154bdb7be91d78b6802843f76b6ece642f11" +uuid = "009596ad-96f7-51b1-9f1b-5ce2d5e8a71e" +version = "1.1.6+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.48.0+0" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" + +[[deps.x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2021.5.5+0" + +[[deps.x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.5.0+0" + +[[deps.xkbcommon_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] +git-tree-sha1 = "9c304562909ab2bab0262639bd4f444d7bc2be37" +uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" +version = "1.4.1+1" diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/Project.toml b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/Project.toml new file mode 100644 index 00000000..8921d1d0 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/Project.toml @@ -0,0 +1,5 @@ +[deps] +CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0" +LoopVectorization = "bdcacae8-1622-11e9-2a5c-532679323890" +Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/bench.png b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/bench.png new file mode 100644 index 00000000..674e57d2 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/bench.png differ diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/jax.txt b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/jax.txt new file mode 100644 index 00000000..07aca7d9 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/jax.txt @@ -0,0 +1,7 @@ +1.668930053710937500e+00 +1.907348632812500000e+00 +1.668930053710937500e+00 +4.053115844726562500e+00 +2.574920654296875000e+01 +3.848075866699218750e+02 +5.556106567382812500e+03 diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f.txt b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f.txt new file mode 100644 index 00000000..af96cd38 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f.txt @@ -0,0 +1,7 @@ +0.024430722891566267 +0.053594923857868015 +0.38354000000000005 +3.307375 +32.25 +335.417 +8106.541 diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f_tturbo.txt b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f_tturbo.txt new file mode 100644 index 00000000..c77b004b --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f_tturbo.txt @@ -0,0 +1,7 @@ +0.07714845360824742 +1.1203333333333332 +1.8657777777777778 +2.5885 +17.75 +280.792 +4118.792 diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f_turbo.txt b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f_turbo.txt new file mode 100644 index 00000000..01677a4c --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/julia-f_turbo.txt @@ -0,0 +1,7 @@ +0.07271325796505652 +0.08990700104493207 +0.31024583333333333 +2.0972222222222223 +19.667 +200.0 +7299.167 diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/numpy.txt b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/numpy.txt new file mode 100644 index 00000000..e3144a26 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/numpy.txt @@ -0,0 +1,7 @@ +3.099441528320312500e+00 +3.814697265625000000e+00 +1.263618469238281250e+01 +9.298324584960937500e+01 +8.859634399414062500e+02 +1.085591316223144531e+04 +1.071548461914062500e+05 diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/poly-julia.jl b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/poly-julia.jl new file mode 100644 index 00000000..687d472b --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/poly-julia.jl @@ -0,0 +1,65 @@ +using BenchmarkTools +using LoopVectorization +using CairoMakie +using DelimitedFiles + +# !!! RUN THIS WITH: julia -t 4 +RERUN = false + +function f(x) + @. 3*x^3 + 2*x^2 + x + 1 +end + +function f_turbo(x) + @turbo @. 3*x^3 + 2*x^2 + x + 1 +end + +function f_tturbo(x) + @tturbo @. 3*x^3 + 2*x^2 + x + 1 +end + +sizes = [10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000] +funcs = [f, f_turbo, f_tturbo] + +if RERUN + times = map(funcs) do fn + ts = map(sizes) do n + x = rand(n) + r = @benchmark $fn($x) + t = minimum(r.times) ./ 10^3 + @info "Benchmarking" fn n t + t + end + writedlm("julia-$(nameof(fn)).txt", ts) + nameof(fn) => ts + end |> Dict +else + times = Dict(nameof(fn)=>readdlm("julia-$(nameof(fn)).txt", Float64)|>vec for fn in funcs) +end + +fig = Figure() +ax = Axis( + fig[1,1], + title=L"f(x) = 3x^3 + 2x^2 + x + 1", + ylabel=L"\mu s", xlabel=L"$n$ where $[x = \text{rand}(n)]$", + xticks=sizes, yticks=[0.1,1,10,100,1000,10000], + xscale=log10, yscale=log10, +) + +lines!(ax, sizes, times[:f], linewidth=2) +scatter!(ax, sizes, times[:f], label="Julia", marker=:circle) + +lines!(ax, sizes, times[:f_turbo], linewidth=2) +scatter!(ax, sizes, times[:f_turbo], label="@turbo", marker=:rect) + +lines!(ax, sizes, times[:f_tturbo], linewidth=2) +scatter!(ax, sizes, times[:f_tturbo], label="@tturbo", marker=:star5) + +lines!(ax, sizes, readdlm("numpy.txt", Float64)|>vec, linewidth=2) +scatter!(ax, sizes, readdlm("numpy.txt", Float64)|>vec, label="Numpy", marker=:diamond) + +lines!(ax, sizes, readdlm("jax.txt", Float64)|>vec, linewidth=2) +scatter!(ax, sizes, readdlm("jax.txt", Float64)|>vec, label="JAX", marker=:hexagon) + +axislegend(position=:lt) +save("bench.png", fig) diff --git a/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/poly-npjax.py b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/poly-npjax.py new file mode 100644 index 00000000..cb1863ee --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/jax-numpy-julia/poly-npjax.py @@ -0,0 +1,73 @@ +# from timeit import timeit +import time +import numpy as np +from numpy.linalg import matrix_power as mpow +import jax +from jax import random +from jax import jit +import jax.numpy as jnp +jax.config.update("jax_enable_x64", True) + +key = random.PRNGKey(758493) + +def timeit(f, *args, number=100): + # compile + f(*args) + times = [] + for _ in range(number): + t1 = time.time() + f(*args) + t2 = time.time() + times.append(t2-t1) + return np.min(times), np.std(times) + + +def run_and_save(f, argss: list[tuple], filename: str): + ms = [] + for args in argss: + (m,s) = timeit(f, *args) + ms.append(m) + np.savetxt(filename, np.array(ms)*10**6) + + +@jit +def f(x): + return 3*x**3 + 2*x**2 + x + 1 + +def g(x): + return 3*x**3 + 2*x**2 + x + 1 + + +run_and_save(g, [ + (np.random.rand(10),), + (np.random.rand(100),), + (np.random.rand(1000),), + (np.random.rand(10000),), + (np.random.rand(100000),), + (np.random.rand(1000000),), + (np.random.rand(10000000),), +], "numpy.txt") + +run_and_save(f, [ + (random.uniform(key, shape=(10,), dtype=jnp.float64),), + (random.uniform(key, shape=(100,), dtype=jnp.float64),), + (random.uniform(key, shape=(1000,), dtype=jnp.float64),), + (random.uniform(key, shape=(10000,), dtype=jnp.float64),), + (random.uniform(key, shape=(100000,), dtype=jnp.float64),), + (random.uniform(key, shape=(1000000,), dtype=jnp.float64),), + (random.uniform(key, shape=(10000000,), dtype=jnp.float64),), +], "jax.txt") + +# n = 1000 +# xnp = np.random.rand(n) +# xjx = random.uniform(key, shape=(n,)) +# +# f(xjx) +# +# a = 10**6 +# m, s = timeit(g, xnp) +# print(f"Numpy {m*a:.3f} μs") +# +# m, s = timeit(f, xjx) +# print(f"JAX {m*a:.3f} μs") + diff --git a/docs_vitepress/src/lectures/lecture_05/lab.md b/docs_vitepress/src/lectures/lecture_05/lab.md new file mode 100644 index 00000000..636b4b15 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/lab.md @@ -0,0 +1,526 @@ +# [Lab 05: Practical performance debugging tools](@id perf_lab) + +Performance is crucial in scientific computing. There is a big difference if your experiments run one minute or one hour. We have already developed quite a bit of code, both in and outside packages, on which we are going to present some of the tooling that Julia provides for finding performance bottlenecks. Performance of your code or more precisely the speed of execution is of course relative (preference, expectation, existing code) and it's hard to find the exact threshold when we should start to care about it. When starting out with Julia, we recommend not to get bogged down by the performance side of things straightaway, but just design the code in the way that feels natural to you. As opposed to other languages Julia offers you to write the things "like you are used to" (depending on your background), e.g. for cycles are as fast as in C; vectorization of mathematical operators works the same or even better than in MATLAB, NumPy. + +Once you have tested the functionality, you can start exploring the performance of your code by different means: + +- manual code inspection - identifying performance gotchas (tedious, requires skill) +- automatic code inspection - `Jet.jl` (probably not as powerful as in statically typed languages) +- benchmarking - measuring variability in execution time, comparing with some baseline (only a statistic, non-specific) +- profiling - measuring the execution time at "each line of code" (no easy way to handle advanced parallelism, ...) +- allocation tracking - similar to profiling but specifically looking at allocations (one sided statistic) + +## Checking type stability + +Recall that type stable function is written in a way, that allows Julia's compiler to infer all the types of all the variables and produce an efficient native code implementation without the need of boxing some variables in a structure whose types is known only during runtime. Probably unbeknown to you we have already seen an example of type unstable function (at least in some situations) in the first lab, where we have defined the `polynomial` function: + +```julia +function polynomial(a, x) + accumulator = 0 + for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] # ! 1-based indexing for arrays + end + return accumulator +end +``` + +The exact form of compiled code and also the type stability depends on the arguments of the function. Let's explore the following two examples of calling the function: + +- Integer number valued arguments + + ```julia + a = [-19, 7, -4, 6] + x = 3 + + polynomial(a, x) + ``` + +- Float number valued arguments + + ```julia + xf = 3.0 + + polynomial(a, xf) + ``` + +The result they produce is the "same" numerically, however it differs in the output type. Though you have probably not noticed it, there should be a difference in runtime (assuming that you have run it once more after its compilation). It is probably a surprise to no one, that one of the methods that has been compiled is type unstable. This can be check with the `@code_warntype` macro: + +```julia +using InteractiveUtils #hide + +@code_warntype polynomial(a, x) # type stable +@code_warntype polynomial(a, xf) # type unstable +``` + +We are getting a little ahead of ourselves in this lab, as understanding of these expressions is part of the future lecture. Anyway the output basically shows what the compiler thinks of each variable in the code, albeit for us in less readable form than the original code. The more red the color is of the type info the less sure the inferred type is. Our main focus should be on the return type of the function which is just at the start of the code with the keyword `Body`. In the first case the return type is an `Int64`, whereas in the second example the compiler is unsure whether the type is `Float64` or `Int64`, marked as the `Union` type of the two. Fortunately for us this type instability can be fixed with a single line edit, but we will see later that it is not always the case. + +::: tip Type stability + +Having a variable represented as `Union` of multiple types in a functions is a lesser evil than having `Any`, as we can at least enumerate statically the available options of functions to which to dynamically dispatch and in some cases there may be a low penalty. + +::: + +::: warning Exercise +Create a new function `polynomial_stable`, which is type stable and measure the difference in evaluation time. + +**HINTS**: +- Ask for help on the `one` and `zero` keyword, which are often as a shorthand for these kind of functions. +- run the function with the argument once before running `@time` or use `@btime` if you have `BenchmarkTools` readily available in your environment +- To see some measurable difference with this simple function, a longer vector of coefficients may be needed. + +::: + +Code stability issues are something unique to Julia, as its JIT compilation allows it to produce code that contains boxed variables, whose type can be inferred during runtime. This is one of the reasons why interpreted languages are slow to run but fast to type. Julia's way of solving it is based around compiling functions for specific arguments, however in order for this to work without the interpreter, the compiler has to be able to infer the types. + +There are other problems (such as unnecessary allocations), that you can learn to spot in your code, however the code stability issues are by far the most commonly encountered problems among beginner users of Julia wanting to squeeze more out of it. + +::: tip Advanced tooling + +Sometimes `@code_warntype` shows that the function's return type is unstable without any hints to the possible problem, fortunately for such cases a more advanced tools such as [`Cthuhlu.jl`](https://github.com/JuliaDebug/Cthulhu.jl) or [`JET.jl`](https://github.com/aviatesk/JET.jl) have been developed. + +::: + +## Benchmarking with `BenchmarkTools` + +In the last exercise we have encountered the problem of timing of code to see, if we have made any progress in speeding it up. Throughout the course we will advertise the use of the `BenchmarkTools` package, which provides an easy way to test your code multiple times. In this lab we will focus on some advanced usage tips and gotchas that you may encounter while using it. + +There are few concepts to know in order to understand how the pkg works + +- evaluation - a single execution of a benchmark expression (default `1`) +- sample - a single time/memory measurement obtained by running multiple evaluations (default `1e5`) +- trial - experiment in which multiple samples are gathered + +The result of a benchmark is a trial in which we collect multiple samples of time/memory measurements, which in turn may be composed of multiple executions of the code in question. This layering of repetition is required to allow for benchmarking code at different runtime magnitudes. Imagine having to benchmark operations which are faster than the act of measuring itself - clock initialization, dispatch of an operation and subsequent time subtraction. + +The number of samples/evaluations can be set manually, however most of the time won't need to know about them, due to an existence of a tuning method `tune!`, which tries to run the code once to estimate the correct ration of evaluation/samples. + +The most commonly used interface of `Benchmarkools` is the `@btime` macro, which returns an output similar to the regular `@time` macro however now aggregated over samples by taking their minimum (a robust estimator for the location parameter of the time distribution, should not be considered an outlier - usually the noise from other processes/tasks puts the results to the other tail of the distribution and some miraculous noisy speedups are uncommon. In order to see the underlying sampling better there is also the `@benchmark` macro, which runs in the same way as `@btime`, but prints more detailed statistics which are also returned in the `Trial` type instead of the actual code output. + +```julia +julia> @btime sum($(rand(1000))) + 174.274 ns (0 allocations: 0 bytes) +504.16236531044757 + +julia> @benchmark sum($(rand(1000))) +BenchmarkTools.Trial: 10000 samples with 723 evaluations. + Range (min … max): 174.274 ns … 364.856 ns ┊ GC (min … max): 0.00% … 0.00% + Time (median): 174.503 ns ┊ GC (median): 0.00% + Time (mean ± σ): 176.592 ns ± 7.361 ns ┊ GC (mean ± σ): 0.00% ± 0.00% + + █▃ ▃▃ ▁ + █████████▇█▇█▇▇▇▇▇▆▆▇▆▆▆▆▆▆▅▆▆▅▅▅▆▆▆▆▅▅▅▅▅▅▅▅▆▅▅▅▄▄▅▅▄▄▅▃▅▅▄▅ █ + 174 ns Histogram: log(frequency) by time 206 ns < + + Memory estimate: 0 bytes, allocs estimate: 0. +``` + +::: danger Interpolation ~ `$` in BenchmarkTools + +In the previous example we have used the interpolation signs `$` to indicate that the code inside should be evaluated once and stored into a local variable. This allows us to focus only on the benchmarking of code itself instead of the input generation. A more subtle way where this is crops up is the case of using previously defined global variable, where instead of data generation we would measure also the type inference at each evaluation, which is usually not what we want. The following list will help you decide when to use interpolation. + +```julia +@btime sum($(rand(1000))) # rand(1000) is stored as local variable, which is used in each evaluation +@btime sum(rand(1000)) # rand(1000) is called in each evaluation + +A = rand(1000) +@btime sum($A) # global variable A is inferred and stored as local, which is used in each evaluation +@btime sum(A) # global variable A has to be inferred in each evaluation +``` + +::: + +## Profiling + +Profiling in Julia is part of the standard library in the `Profile` module. It implements a fairly simple sampling based profiler, which in a nutshell asks at regular intervals, where the code execution is currently at. As a result we get an array of stacktraces (= chain of function calls), which allow us to make sense of where the execution spent the most time. The number of samples, that can be stored and the period in seconds can be checked after loading `Profile` into the session with the `init()` function. + +```julia +using Profile +Profile.init() +``` + +The same function, but with keyword arguments, can be used to change these settings, however these settings are system dependent. For example on Windows, there is a known issue that does not allow to sample faster than at `0.003s` and even on Linux based system this may not do much. There are some further caveat specific to Julia: + +- When running profile from REPL, it is usually dominated by the interactive part which spawns the task and waits for it's completion. +- Code has to be run before profiling in order to filter out all the type inference and interpretation stuff. (Unless compilation is what we want to profile.) +- When the execution time is short, the sampling may be insufficient -> run multiple times. + +### Polynomial with scalars + +Let's look at our favorite `polynomial` function or rather it's type stable variant `polynomial_stable` under the profiling lens. + +```julia +# clear the last trace (does not have to be run on fresh start) +Profile.clear() + +@profile polynomial_stable(a, xf) + +# text based output of the profiler +# not shown here because it is not incredibly informative +Profile.print() +``` + +Unless the machine that you run the code on is really slow, the resulting output contains nothing or only some internals of Julia's interactive REPL. This is due to the fact that our `polynomial` function take only few nanoseconds to run. When we want to run profiling on something, that takes only a few nanoseconds, we have to repeatedly execute the function. + +```julia +function run_polynomial_stable(a, x, n) + for _ in 1:n + polynomial_stable(a, x) + end +end + +a = rand(-10:10, 10) # using longer polynomial + +run_polynomial_stable(a, xf, 10) #hide +Profile.clear() +@profile run_polynomial_stable(a, xf, Int(1e5)) +Profile.print() +``` + +In order to get more of a visual feel for profiling, there are packages that allow you to generate interactive plots or graphs. In this lab we will use [`ProfileSVG.jl`](https://github.com/timholy/ProfileSVG.jl), which does not require any fancy IDE or GUI libraries. + +```julia +@profview run_polynomial_stable(a, xf, Int(1e5)) +``` + +![poly_stable](poly_stable.png) + +::: warning Exercise + +Let's compare this with the type unstable situation. + +::: + +Other options for viewing profiler outputs + +- [ProfileView](https://github.com/timholy/ProfileView.jl) - close cousin of `ProfileSVG`, spawns GTK window with interactive FlameGraph +- [VSCode](https://www.julia-vscode.org/docs/stable/release-notes/v0_17/#Profile-viewing-support-1) - always imported `@profview` macro, flamegraphs (js extension required), filtering, one click access to source code +- [PProf](https://github.com/vchuravy/PProf.jl) - serializes the profiler output to protobuffer and loads it in `pprof` web app, graph visualization of stacktraces + +## [Applying fixes](@id horner) + +We have noticed that no matter if the function is type stable or unstable the majority of the computation falls onto the power function `^` and there is a way to solve this using a clever technique called Horner schema[^1], which uses distributive and associative rules to convert the sum of powers into an incremental multiplication of partial results. + +::: warning Exercise + +Rewrite the `polynomial` function using the Horner schema/method[^1]. Moreover include the type stability fixes from `polynomial_stable` You should get more than 3x speedup when measured against the old implementation (measure `polynomial` against `polynomial_stable`. + +**BONUS**: Profile the new method and compare the differences in traces. + +[^1]: Explanation of the Horner schema can be found on [https://en.wikipedia.org/wiki/Horner%27s\_method](https://en.wikipedia.org/wiki/Horner%27s_method). + +::: + +--- + +### Where to find source code? + +As most of Julia is written in Julia itself it is sometimes helpful to look inside for some details or inspiration. The code of `Base` and stdlib pkgs is located just next to Julia's installation in the `./share/julia` subdirectory + +```bash +./julia-1.6.2/ + ├── bin + ├── etc + │ └── julia + ├── include + │ └── julia + │ └── uv + ├── lib + │ └── julia + ├── libexec + └── share + ├── appdata + ├── applications + ├── doc + │ └── julia # offline documentation (https://docs.julialang.org/en/v1/) + └── julia + ├── base # base library + ├── stdlib # standard library + └── test +``` + +Other packages installed through Pkg interface are located in the `.julia/` directory which is located in your `$HOMEDIR`, i.e. `/home/$(user)/.julia/` on Unix based systems and `/Users/$(user)/.julia/` on Windows. + +```bash +~/.julia/ + ├── artifacts + ├── compiled + ├── config # startup.jl lives here + ├── environments + ├── logs + ├── packages # packages are here + └── registries +``` + +If you are using VSCode, the paths visible in the REPL can be clicked through to he actual source code. Moreover in that environment the documentation is usually available upon hovering over code. + +### Setting up benchmarks to our liking + +In order to control the number of samples/evaluation and the amount of time given to a given benchmark, we can simply append these as keyword arguments to `@btime` or `@benchmark` in the following way + +```julia +julia> @benchmark sum($(rand(1000))) evals=100 samples=10 seconds=1 +BenchmarkTools.Trial: 10 samples with 100 evaluations. + Range (min … max): 174.580 ns … 188.750 ns ┊ GC (min … max): 0.00% … 0.00% + Time (median): 175.420 ns ┊ GC (median): 0.00% + Time (mean ± σ): 176.585 ns ± 4.293 ns ┊ GC (mean ± σ): 0.00% ± 0.00% + + █ + █▅▁█▁▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅ ▁ + 175 ns Histogram: frequency by time 189 ns < + + Memory estimate: 0 bytes, allocs estimate: 0. +``` + +which runs the code repeatedly for up to `1s`, where each of the `10` samples in the trial is composed of `10` evaluations. Setting up these parameters ourselves creates a more controlled environment in which performance regressions can be more easily identified. + +Another axis of customization is needed when we are benchmarking mutable operations such as `sort!`, which sorts an array in-place. One way of achieving a consistent benchmark is by omitting the interpolation such as + +```julia +julia> @benchmark sort!(rand(1000)) +BenchmarkTools.Trial: 10000 samples with 1 evaluation. + Range (min … max): 27.250 μs … 95.958 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 29.875 μs ┊ GC (median): 0.00% + Time (mean ± σ): 30.340 μs ± 2.678 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▃▇█▄▇▄ + ▁▁▁▂▃▆█████████▆▅▃▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▂ + 27.2 μs Histogram: frequency by time 41.3 μs < + + Memory estimate: 7.94 KiB, allocs estimate: 1. +``` + +however now we are again measuring the data generation as well. A better way of doing such timing is using the built in `setup` keyword, into which you can put a code that has to be run before each sample and which won't be measured. + +```julia +julia> @benchmark sort!(y) setup=(y=rand(1000)) +BenchmarkTools.Trial: 10000 samples with 7 evaluations. + Range (min … max): 7.411 μs … 25.869 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 7.696 μs ┊ GC (median): 0.00% + Time (mean ± σ): 7.729 μs ± 305.383 ns ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▂▄▅▆█▇▇▆▄▃ + ▁▁▁▁▂▂▃▄▅▆████████████▆▅▃▂▂▂▁▁▁▁▁▁▁▁▁▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▃ + 7.41 μs Histogram: frequency by time 8.45 μs < + + Memory estimate: 0 bytes, allocs estimate: 0. +``` + +## Ecosystem debugging + +Let's now apply what we have learned so far on the much bigger codebase of our +`Ecosystem`. + +```julia +include("ecosystems/lab04/Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); +nothing # hide +``` + +::: warning Exercise + +Use `@profview` and `@code_warntype` to find the type unstable and slow parts of +our simulation. + +Precompile everything by running one step of our simulation and run the profiler +like this: + +```julia +world_step!(world) +@profview for i=1:100 world_step!(world) end +``` + +You should get a flamegraph similar to the one below: +![lab04-ecosystem](ecosystems/lab04-worldstep.png) + +::: + +## Different `Ecosystem.jl` versions + +In order to fix the type instability in the `Vector{Agent}` we somehow have to +rethink our world such that we get a vector of a concrete type. Optimally we would have one +vector for each type of agent that populates our world. Before we completely +redesign how our world works we can try a simple hack that might already improve +things. Instead of letting julia figure our which types of agents we have (which +could be infinitely many), we can tell the compiler at least that we have only +three of them: `Wolf`, `Sheep`, and `Grass`. + +We can do this with a tiny change in the constructor of our `World`: + +```julia +function World(agents::Vector{<:Agent}) + ids = [a.id for a in agents] + length(unique(ids)) == length(agents) || error("Not all agents have unique IDs!") + + # construct Dict{Int,Union{Animal{Wolf}, Animal{Sheep}, Plant{Grass}}} + # instead of Dict{Int,Agent} + types = unique(typeof.(agents)) + dict = Dict{Int,Union{types...}}(a.id => a for a in agents) + + World(dict, maximum(ids)) +end +``` + +::: warning Exercise + +1. Run the benchmark script provided [here](ecosystems/lab04/bench.jl) to get + timings for `find_food` and `reproduce!` for the original ecosystem. +2. Run the same benchmark with the modified `World` constructor. + +Which differences can you observe? Why is one version faster than the other? + +::: + +--- + +Julia still has to perform runtime dispatch on the small `Union` of `Agent`s that is in our dictionary. +To avoid this we could create a world that - instead of one plain dictionary - works with a tuple of dictionaries +with one entry for each type of agent. Our world would then look like this: + +```julia +# pseudocode: +world ≈ ( + :Grass => Dict{Int, Plant{Grass}}(...), + :Sheep => Dict{Int, Animal{Sheep}}(...), + :Wolf => Dict{Int, Animal{Wolf}}(...) +) +``` + +In order to make this work we have to touch our ecosystem code in a number of +places, mostly related to `find_food` and `reproduce!`. You can find a working +version of the ecosystem with a world based on `NamedTuple`s +[here](ecosystems/animal_S_world_NamedTupleDict/Ecosystem.jl). +With this slightly more involved update we can gain another bit of speed: + +| | `find_food` | `reproduce!` | +|-------------------------------------------|-------------|--------------| +|`Animal{A}` & `Dict{Int,Agent}` | 43.917 μs | 439.666 μs | +|`Animal{A}` & `Dict{Int,Union{...}}` | 12.208 μs | 340.041 μs | +|`Animal{A}` & `NamedTuple{Dict,...}` | 8.639 μs | 273.103 μs | + +And type stable code! + +```@setup namedtuple +include("ecosystems/animal_S_world_NamedTupleDict/Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); +``` + +```@example namedtuple +using InteractiveUtils # hide + +w = Wolf(4000) + +find_food(w, world) +@code_warntype find_food(w, world) +``` + +--- + +The last optimization we can do is to move the `Sex` of our animals from a field +into a parametric type. Our world would then look like below: + +```julia +# pseudocode: +world ≈ ( + :Grass => Dict{Int, Plant{Grass}}(...), + :SheepFemale => Dict{Int, Animal{Sheep,Female}}(...), + :SheepMale => Dict{Int, Animal{Sheep,Male}}(...), + :WolfFemale => Dict{Int, Animal{Wolf,Female}}(...) + :WolfMale => Dict{Int, Animal{Wolf,Male}}(...) +) +``` + +This should give us a lot of speedup in the `reproduce!` function, because we +will not have to `filter` for the correct sex anymore, but instead can just pick +the `NamedTuple` that is associated with the correct type of mate. +Unfortunately, changing the type signature of `Animal` essentially means that we +have to touch every line of code of our original ecosystem. However, the gain we +get for it is quite significant: + +| | `find_food` | `reproduce!` | +|-------------------------------------------|-------------|--------------| +|`Animal{A}` & `Dict{Int,Agent}` | 43.917 μs | 439.666 μs | +|`Animal{A}` & `Dict{Int,Union{...}}` | 12.208 μs | 340.041 μs | +|`Animal{A}` & `NamedTuple{Dict,...}` | 8.639 μs | 273.103 μs | +|`Animal{A,S}` & `NamedTuple{Dict,...}` | 7.823 μs | 77.646 ns | +|`Animal{A,S}` & `Dict{Int,Union{...}}` | 13.416 μs | 6.436 ms | + +The implementation of the new version with two parametric types can be found [here](ecosystems/animal_ST_world_NamedTupleDict/Ecosystem.jl). The completely blue (i.e. type stable) `@profview` of this version of the Ecosystem is quite satisfying to see + +![neweco](ecosystems/animal_ST_world_NamedTuple_worldstep.png) + +The same is true for the output of `@code_warntype` + +```@example newblock +include("ecosystems/animal_ST_world_NamedTupleDict/Ecosystem.jl") + +function make_counter() + n = 0 + counter() = n += 1 +end + +function create_world() + n_grass = 1_000 + n_sheep = 40 + n_wolves = 4 + + nextid = make_counter() + + World(vcat( + [Grass(nextid()) for _ in 1:n_grass], + [Sheep(nextid()) for _ in 1:n_sheep], + [Wolf(nextid()) for _ in 1:n_wolves], + )) +end +world = create_world(); +nothing # hide +``` + +```@example newblock +using InteractiveUtils # hide + +w = Wolf(4000) + +find_food(w, world) +@code_warntype find_food(w, world) +``` +## Useful resources + +The problem we have been touching in the latter part is quite pervasive in some systems with many agents. One solution we have not used here is to use SumTypes. Julia does not have a native support, but offers solutions thourough libraries like [SumTypes.jl](https://github.com/JuliaDynamics/LightSumTypes.jl), [UniTyper.jl](https://github.com/YingboMa/Unityper.jl) or [Moshi](https://rogerluo.dev/Moshi.jl/). diff --git a/docs_vitepress/src/lectures/lecture_05/lecture.md b/docs_vitepress/src/lectures/lecture_05/lecture.md new file mode 100644 index 00000000..5cae30a9 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/lecture.md @@ -0,0 +1,1186 @@ +# [Benchmarking, profiling, and performance gotchas](@id perf_lecture) + +This class is a short introduction to writing a performant code. As such, we cover + +- how to identify weak spots in the code +- how to properly benchmark +- common performance anti-patterns +- Julia's "performance gotchas", by which we mean performance problems specific for Julia (typical caused by the lack of understanding of Julia or by a errors in conversion from script to functions) + +But **Never optimize code from the very beginning !!!** A productive workflow is + +1. write the code that is idiomatic and easy to understand +2. meticulously cover the code with unit test, such that you know that the optimized code works the same as the original +3. optimize the code + +**Premature optimization frequently backfires**, because: + +- you might end-up optimizing wrong thing, i.e. you will not optimize performance bottleneck, but something very different +- optimized code can be difficult to read and reason about, which means it is more difficult to make it right. + +## Optimize for your mode of operation + +Let's for fun measure a difference in computation of a simple polynomial over elements of arrays +between numpy, jax, default Julia, and Julia with `LoopVectorization` library. + +::: code-group + +```python [Python numpy] +import numpy as np + +def g(x): + return 3*x**3 + 2*x**2 + x + 1 + +x = np.random.rand(10) +f(x) +``` + +```python [Python jax + jit] +import jax +from jax import jit +import jax.numpy as jnp +jax.config.update("jax_enable_x64", True) + +@jit +def f(x): + return 3*x**3 + 2*x**2 + x + 1 + +x = random.uniform(key, shape=(10,), dtype=jnp.float64) +g(x) +``` + +```julia [Pure Julia] +function f(x) + @. 3*x^3 + 2*x^2 + x + 1 +end + +x = rand(10) +f(x) +``` + +```julia [Julia @turbo] +using LoopVectorization + +function f_turbo(x) + @turbo @. 3*x^3 + 2*x^2 + x + 1 +end +``` + +```julia [Julia @tturbo] +using LoopVectorization + +function f_tturbo(x) + @tturbo @. 3*x^3 + 2*x^2 + x + 1 +end +``` + +::: + +A complete implementations can be found here: [Julia](jax-numpy-julia/poly-julia.jl) and [Python](jax-numpy-julia/poly-npjax.py). Julia should be executed with multithreaded support, in the case of below image it used four threads on MacBook PRO with M1 processor with four performant and four energy efficient cores. Below figure shows the minimum execution time with respect to the + +![figure](jax-numpy-julia/bench.png) + +It frequently happens that Julia newbies asks on forum that their code in Julia is slow in comparison to the same code in Python (numpy). Most of the time, they make trivial mistakes and it is very educative to go over their mistakes + +## Numpy 10x faster than julia what am i doing wrong? (solved julia faster now) [^1] + +[^1]: Adapted from Julia's discourse [thread](https://discourse.julialang.org/t/numpy-10x-faster-than-julia-what-am-i-doing-wrong-solved-julia-faster-now/29922) + +```julia +function f(p) # line 1 + t0,t1 = p # line 2 + m0 = [[cos(t0) - 1im*sin(t0) 0]; [0 cos(t0) + 1im*sin(t0)]] # line 3 + m1 = [[cos(t1) - 1im*sin(t1) 0]; [0 cos(t1) + 1im*sin(t1)]] # line 4 + r = m1*m0*[1. ; 0.] # line 5 + return abs(r[1])^2 # line 6 +end + +function g(p,n) + return [f(p[:,i]) for i=1:n] +end + +g(rand(2,3),3) # call to force jit compilation + +n = 10^6 +p = 2*pi*rand(2,n) + +@time g(p,n); +``` + +Let's first use Profiler to identify, where the function spends most time. + +::: tip Julia's built-in profiler + +Julia's built-in profiler is part of the standard library in the `Profile` module implementing a fairly standard sampling based profiler. It a nutshell it asks at regular intervals, where the code execution is currently and marks it and collects this information in some statistics. This allows us to analyze, where these "probes" have occurred most of the time which implies those parts are those, where the execution of your function spends most of the time. As such, the profiler has two "controls", which is the `delay` between two consecutive probes and the maximum number of probes `n` (if the profile code takes a long time, you might need to increase it). + +```julia +using Profile +Profile.init(; n = 989680, delay = 0.001)) +@profile g(p,n) +Profile.clear() +@profile g(p,n) +``` + +### Making sense of profiler's output + +The default `Profile.print` function shows the call-tree with count, how many times the probe occurred in each function sorted from the most to least. The output is a little bit difficult to read and orient in, therefore there are some visualization options. + +What are our options? + +- `ProfileCanvas` visualizes the profile as a flamegraph and saves it to html format, which is viewed by most browser (it is also very convenient for sharing with others). **The type-instability is highlighted in red.** +- `PProf.jl` is a front-end to Google's PProf profile viewer [https://github.com/JuliaPerf/PProf.jl](https://github.com/JuliaPerf/PProf.jl) + +### Profiling caveats + +The same function, but with keyword arguments, can be used to change these settings, however these settings are system dependent. For example on Windows, there is a known issue that does not allow to sample faster than at `0.003s` and even on Linux based system this may not do much. There are some further caveat specific to Julia: + +- When running profile from REPL, it is usually dominated by the interactive part which spawns the task and waits for it's completion. +- Code has to be run before profiling in order to filter out all the type inference and interpretation stuff. (Unless compilation is what we want to profile.) +- When the execution time is short, the sampling may be insufficient -> run multiple times. + +We will use `ProfileCanvas` for its simplicity (especially installation). It shows the statistics in form of a flame graph which read as follows: + +- each function is represented by a horizontal bar +- function in the top calls functions below +- the width of the bar corresponds to time spent in the function +- red colored bars indicate type instabilities + +Function name contains location in files and particular line number called. + +::: + +Let's use the profiler on the above function `g` to find potential weak spots + +```julia +using ProfileCanvas +prof = @profview g(p, n) +ProfileCanvas.html_file("profiles/profile.html", prof) +``` + +The output can be seen [here](profiles/profile.html) + +We can see that the function is type stable and 2/3 of the time is spent in lines 3 and 4, which allocates arrays + +```julia +[[cos(t0) - 1im*sin(t0) 0]; + [0 cos(t0) + 1im*sin(t0)]] +``` + +and + +```julia +[[cos(t1) - 1im*sin(t1) 0]; + [0 cos(t1) + 1im*sin(t1)]] +``` + +Since version 1.8, Julia offers a memory profiler, which helps to identify parts of the code allocating memory on heap. The memory profiler is integrated into `ProfileCanvas` and can be accessed by a convenient macro `@profview_allocs`. + +```julia +using ProfileCanvas +@profview_allocs g(p, n) +``` + +Investigating the output we found that most allocations are caused by concatenation of arrays on lines 3 and 4. + +Scrutinizing the function `f`, we see that in every call, it has to allocate arrays `m0` and `m1` **on the heap.** The allocation on heap is expensive, because it might require interaction with the operating system and it potentially stress garbage collector. Can we avoid it? +Repeated allocation can be frequently avoided by: + +- preallocating arrays (if the arrays are of the fixed dimensions) +- or allocating objects on stack, which does not involve interaction with OS (but can be used in limited cases.) + +### Adding preallocation + +```julia +function f!(m0, m1, p, u) # line 1 + t0, t1 = p # line 2 + m0[1,1] = cos(t0) - 1im*sin(t0) # line 3 + m0[2,2] = cos(t0) + 1im*sin(t0) # line 4 + m1[1,1] = cos(t1) - 1im*sin(t1) # line 5 + m1[2,2] = cos(t1) + 1im*sin(t1) # line 6 + r = m1*m0*u # line 7 + return abs(r[1])^2 # line 8 +end + +function g2(p,n) + u = [1. ; 0.] + m0 = [(cos(p[1]) - 1im*sin(p[1])) 0; 0 (cos(p[1]) + 1im*sin(p[1]))] # line 3 + m1 = [(cos(p[2]) - 1im*sin(p[2])) 0; 0 (cos(p[2]) + 1im*sin(p[2]))] + return [f!(m0, m1, p[:,i], u) for i=1:n] +end +``` + +::: tip Benchmarking + +The simplest benchmarking can be as simple as writing + +```julia +repetitions = 100 +t₀ = time() +for in 1:100 + g(p, n) +end +(time() - t₀) / n +``` + +where we add repetitions to calibrate for background processes that can step in the precise measurements (recall that your program is not allone). Writing the above for benchmarking is utterly boring. Moreover, you might want to automatically determine the number of repetitions (the shorter time the more repetitions you want), take care of compilation of the function outside measured loop, you might want to have more informative output, for example median, mean, and maximum time of execution, information about number of allocation, time spent in garbage collector, etc. This is in nutshell what `BenchmarkTools.jl` offers, which we consider an essential tool for anyone interesting in tuning its code. + +::: + +We will use macro `@benchmark` from `BenchmarkTools.jl` to observe the speedup we will get between `g` and `g2`. + +```julia +using BenchmarkTools + +julia> @benchmark g(p,n) +BenchmarkTools.Trial: 5 samples with 1 evaluation. + Range (min … max): 1.168 s … 1.199 s ┊ GC (min … max): 11.57% … 13.27% + Time (median): 1.188 s ┊ GC (median): 11.91% + Time (mean ± σ): 1.183 s ± 13.708 ms ┊ GC (mean ± σ): 12.10% ± 0.85% + + █ █ █ █ █ + █▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁ + 1.17 s Histogram: frequency by time 1.2 s < + + Memory estimate: 1.57 GiB, allocs estimate: 23000002. +``` + +```julia +julia> @benchmark g2(p,n) +BenchmarkTools.Trial: 11 samples with 1 evaluation. + Range (min … max): 413.167 ms … 764.393 ms ┊ GC (min … max): 6.50% … 43.76% + Time (median): 426.728 ms ┊ GC (median): 6.95% + Time (mean ± σ): 460.688 ms ± 102.776 ms ┊ GC (mean ± σ): 12.85% ± 11.04% + + ▃█ █ + ██▇█▁▁▁▁▁▁▁▁▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇ ▁ + 413 ms Histogram: frequency by time 764 ms < + + Memory estimate: 450.14 MiB, allocs estimate: 4000021. +``` + +We can see that we have approximately 3-fold improvement. + +Let's profile again. + +```julia +prof = @profview g2(p,n) +ProfileCanvas.html_file("profiles/profile2.html", prof) +``` + +What the profiler tells is now (clear [here](profiles/profile2.html) to see the output)? + +- we spend a lot of time in `similar` in `matmul`, which is again an allocation of results for storing output of multiplication on line 7 matrix `r`. +- the trigonometric operations on line 3-6 are very costly +- Slicing `p` always allocates a new array and performs a deep copy. + +Let's get rid of memory allocations at the expense of the code clarity + +```julia +using LinearAlgebra +@inline function initm!(m, t) + st, ct = sincos(t) + @inbounds m[1,1] = Complex(ct, -st) + @inbounds m[2,2] = Complex(ct, st) +end + +function f1!(r1, r2, m0, m1, t0, t1, u) + initm!(m0, t0) + initm!(m1, t1) + mul!(r1, m0, u) + mul!(r2, m1, r1) + return @inbounds abs(@inbounds r2[1])^2 +end + +function g3(p,n) + u = [1. ; 0.] + m0 = [cos(p[1]) - 1im*sin(p[1]) 0; 0 cos(p[1]) + 1im*sin(p[1])] + m1 = [cos(p[2]) - 1im*sin(p[2]) 0; 0 cos(p[2]) + 1im*sin(p[2])] + r1 = m0*u + r2 = m1*r1 + return [f1!(r1, r2, m0, m1, p[1,i], p[2,i], u) for i=1:n] +end +``` + +```julia +julia> @benchmark g3(p,n) + Range (min … max): 193.922 ms … 200.234 ms ┊ GC (min … max): 0.00% … 1.67% + Time (median): 195.335 ms ┊ GC (median): 0.00% + Time (mean ± σ): 196.003 ms ± 1.840 ms ┊ GC (mean ± σ): 0.26% ± 0.61% + + █▁ ▁ ██▁█▁ ▁█ ▁ ▁ ▁ ▁ ▁ ▁ ▁▁ ▁ ▁ ▁ + ██▁▁█▁█████▁▁██▁█▁█▁█▁▁▁▁▁▁▁▁█▁▁▁█▁▁▁▁▁█▁▁▁██▁▁▁█▁▁█▁▁▁▁▁▁▁▁█ ▁ + 194 ms Histogram: frequency by time 200 ms < + + Memory estimate: 7.63 MiB, allocs estimate: 24. +``` + +Notice that now, we are about six times faster than the first solution, albeit passing the preallocated arrays is getting messy. Also notice that we spent a very little time in garbage collector. Running the profiler, + +```julia +prof = @profview g3(p,n) +ProfileCanvas.html_file("profiles/profile3.html", prof) +``` + +we see [here](profiles/profile3.html) that there is a very little what we can do now. May-be, remove bounds checks (more on this later) and make the code a bit nicer. + +Let's look at solution from a Discourse + +```julia +using StaticArrays, BenchmarkTools + +function f(t0,t1) + cis0, cis1 = cis(t0), cis(t1) + m0 = @SMatrix [ conj(cis0) 0 ; 0 cis0] + m1 = @SMatrix [ conj(cis1) 0 ; 0 cis1] + r = m1 * (m0 * @SVector [1. , 0.]) + return abs2(r[1]) +end + +g4(p) = [f(p[1,i],p[2,i]) for i in axes(p,2)] +``` + +```julia +julia> @benchmark g4(p) + Range (min … max): 36.076 ms … 43.657 ms ┊ GC (min … max): 0.00% … 9.96% + Time (median): 37.948 ms ┊ GC (median): 0.00% + Time (mean ± σ): 38.441 ms ± 1.834 ms ┊ GC (mean ± σ): 1.55% ± 3.60% + + █▃▅ ▅▂ ▂ + ▅▇▇███████▅███▄████▅▅▅▅▄▇▅▇▄▇▄▁▄▇▄▄▅▁▄▁▄▁▄▅▅▁▁▅▁▁▅▄▅▄▁▁▁▁▁▅ ▄ + 36.1 ms Histogram: frequency by time 43.4 ms < + + Memory estimate: 7.63 MiB, allocs estimate: 2. +``` + +We can see that it is six-times faster than ours while also being much nicer to read and +having almost no allocations. Where is the catch? +It uses `StaticArrays` which offers linear algebra primitices performant for vectors and matrices of small size. They are allocated on stack, therefore there is no pressure of GarbageCollector and the type is specialized on size of matrices (unlike regular matrices) works on arrays of an sizes. This allows the compiler to perform further optimizations like unrolling loops, etc. + +What we have learned so far? + +- Profiler is extremely useful in identifying functions, where your code spends most time. +- Memory allocation (on heap to be specific) can be very bad for the performance. We can generally avoided by pre-allocation (if possible) or allocating on the stack (Julia offers increasingly larger number of primitives for hits. We have already seen StaticArrays, DataFrames now offers for example String3, String7, String15, String31). +- Benchmarking is useful for comparison of solutions + +## Replacing deep copies with shallow copies (use view if possible) + +Let's look at the following function computing mean of a columns + +```julia +function cmean(x::AbstractMatrix{T}) where {T} + o = zeros(T, size(x,1)) + for i in axes(x, 2) + o .+= x[:,i] # line 4 + end + n = size(x, 2) + n > 0 ? o ./ n : o +end + +x = randn(2, 10000) +``` + +```julia +@benchmark cmean(x) +BenchmarkTools.Trial: 10000 samples with 1 evaluation. + Range (min … max): 371.018 μs … 3.291 ms ┊ GC (min … max): 0.00% … 83.30% + Time (median): 419.182 μs ┊ GC (median): 0.00% + Time (mean ± σ): 482.785 μs ± 331.939 μs ┊ GC (mean ± σ): 9.91% ± 12.02% + + ▃█▄▃▃▂▁ ▁ + ████████▇▆▅▃▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▇██ █ + 371 μs Histogram: log(frequency) by time 2.65 ms < + + Memory estimate: 937.59 KiB, allocs estimate: 10001. +``` + +What we see that function is performing more than 10000 allocations. They come from `x[:,i]` which allocates a new memory and copies the content. In this case, this is completely unnecessary, as the content of the array `x` is never modified. We can avoid it by creating a `view` into an `x`, which you can imagine as a pointer to `x` which automatically adjust the bounds. Views can be constructed either using a function call `view(x, axes...)` or using a convenience macro `@view ` which turns the usual notation `x[...]` to `view(x, ...)` + +```julia +function view_cmean(x::AbstractMatrix{T}) where {T} + o = zeros(T, size(x,1)) + for i in axes(x, 2) + o .+= @view x[:,i] + end + n = size(x,2) + n > 0 ? o ./ n : o +end +``` + +We obtain instantly a 10-fold speedup + +```julia +julia> @benchmark view_cmean(x) +BenchmarkTools.Trial: 10000 samples with 1 evaluation. + Range (min … max): 36.802 μs … 166.260 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 41.676 μs ┊ GC (median): 0.00% + Time (mean ± σ): 42.936 μs ± 9.921 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▂ █▆█▆▂ ▁▁ ▁ ▁ ▂ + █▄█████████▇▅██▆█▆██▆▆▇▆▆▆▆▇▆▅▆▆▅▅▁▅▅▆▇▆▆▆▆▄▃▆▆▆▄▆▄▅▅▄▆▅▆▅▄▆ █ + 36.8 μs Histogram: log(frequency) by time 97.8 μs < + + Memory estimate: 96 bytes, allocs estimate: 1. +``` + +## Traverse arrays in the right order + +Let's now compute rowmean using the function similar to `cmean` and since we have learnt from the above, we use the `view` to have non-allocating version + +```julia +function rmean(x::AbstractMatrix{T}) where {T} + o = zeros(T, size(x,2)) + for i in axes(x, 1) + o .+= @view x[i,:] + end + n = size(x,1) + n > 0 ? o ./ n : o +end +x = randn(10000, 2) +``` + +```julia +x = randn(10000, 2) +@benchmark rmean(x) +BenchmarkTools.Trial: 10000 samples with 1 evaluation. + Range (min … max): 44.165 μs … 194.395 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 46.654 μs ┊ GC (median): 0.00% + Time (mean ± σ): 48.544 μs ± 10.940 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + █▆█▇▄▁ ▁ ▂ + ██████▇▇▇▇▆▇▅██▇█▇█▇▆▅▄▄▅▅▄▄▄▄▂▄▅▆▅▅▅▆▅▅▅▆▄▆▄▄▅▅▄▅▄▄▅▅▅▅▄▄▃▅ █ + 44.2 μs Histogram: log(frequency) by time 108 μs < + + Memory estimate: 192 bytes, allocs estimate: 2. +``` + +The above seems OK and the speed is comparable to our tuned `cmean`. But, can we actually do better? We have to realize that when we are accessing slices in the matrix `x`, they are not aligned in the memory. Recall that Julia is column major (like Fortran and unlike C and Python), which means that consecutive arrays of memory are along columns. i.e for a matrix with n rows and m columns they are aligned as + +``` +1 | n + 1 | 2n + 1 | ⋯ | (m-1)n + 1 +2 | n + 2 | 2n + 2 | ⋯ | (m-1)n + 2 +3 | n + 3 | 2n + 3 | ⋯ | (m-1)n + 3 +⋮ | ⋮ | ⋮ | ⋯ | ⋮ +n | 2n | 3n | ⋯ | mn +``` + +accessing non-consecutively is really bad for cache, as we have to load the memory into a cache line and use a single entry (in case of Float64 it is 8 bytes) out of it, discard it and load another one. If cache line has length 32 bytes, then we are wasting remaining 24 bytes. Therefore, we rewrite `rmean` to access the memory in consecutive blocks as follows, where we essentially sum the matrix column by columns. + +```julia +function aligned_rmean(x::AbstractMatrix{T}) where {T} + o = zeros(T, size(x,2)) + for i in axes(x, 2) + o[i] = sum(@view x[:, i]) + end + n = size(x, 1) + n > 0 ? o ./ n : o +end + +aligned_rmean(x) ≈ rmean(x) +``` + +```julia +julia> @benchmark aligned_rmean(x) +BenchmarkTools.Trial: 10000 samples with 10 evaluations. + Range (min … max): 1.988 μs … 11.797 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 2.041 μs ┊ GC (median): 0.00% + Time (mean ± σ): 2.167 μs ± 568.616 ns ┊ GC (mean ± σ): 0.00% ± 0.00% + + █▇▄▂▂▁▁ ▁ ▂▁ ▂ + ██████████████▅▅▃▁▁▁▁▁▄▅▄▁▅▆▆▆▇▇▆▆▆▆▅▃▅▅▄▅▅▄▄▄▃▃▁▁▁▄▁▁▄▃▄▃▆ █ + 1.99 μs Histogram: log(frequency) by time 5.57 μs < + + Memory estimate: 192 bytes, allocs estimate: 2. +``` + +Running the benchmark shows that we have about 20x speedup and we are on par with Julia's built-in functions. + +::: tip + +Remark tempting it might be, there is actually nothing we can do to speed-up the `cmean` function. This trouble is inherent to the processor design and you should be careful how you align things in the memory, such that it is performant in your project + +::: + +Detecting this type of inefficiencies is generally difficult, and requires processor assisted measurement. `LIKWID.jl` is a wrapper for a LIKWID library providing various processor level statistics, like throughput, cache misses + +## Type stability + +Sometimes it happens that we create a non-stable code, which might be difficult to spot at first, for a non-trained eye. A prototypical example of such bug is as follows + +```julia +function poor_sum(x) + s = 0 + for xᵢ in x + s += xᵢ + end + s +end +``` + +```julia +x = randn(10^8); +julia> @benchmark poor_sum(x) +BenchmarkTools.Trial: 23 samples with 1 evaluation. + Range (min … max): 222.055 ms … 233.552 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 225.259 ms ┊ GC (median): 0.00% + Time (mean ± σ): 225.906 ms ± 3.016 ms ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▁ ▁ ▁▁█ ▁▁ ▁ ▁█ ▁ ▁ ▁ ▁ ▁ ▁▁▁▁ ▁ ▁ + █▁█▁███▁▁██▁▁█▁██▁█▁█▁█▁█▁█▁▁▁▁████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁█ ▁ + 222 ms Histogram: frequency by time 234 ms < + + Memory estimate: 16 bytes, allocs estimate: 1. +``` + +Can we do better? Let's look what profiler says. + +```julia +using ProfileCanvas + +prof = @profview poor_sum(x) +ProfileCanvas.html_file("profiles/profile4.html", prof) +``` + +The profiler (output [here](profiles/profile4.html)) does not show any red, which means that according to the profilerthe code is type stable (and so does the `@code_typed poor_sum(x)` does not show anything bad.) Yet, we can see that the fourth line of the `poor_sum` function takes unusually long (there is a white area above, which means that the time spend in childs of that line (iteration and sum) does the sum to the time spent in the line, which is fishy). + +A close lookup on the code reveals that `s` is initialized as `Int64`, because `typeof(0)` is `Int64`. But then in the loop, we add to `s` a `Float64` because `x` is `Vector{Float64}`, which means during the execution, the type `s` changes the type. + +So why nor compiler nor `@code_typed(poor_sum(x))` warns us about the type instability? This is because of the optimization called **small unions**, where Julia can optimize "small" type instabilitites (recall the second [lecture](@ref type_lecture)). + +We can fix it for example by initializing `x` to be the zero of an element type of the array `x` (though this solution technically assumes `x` is an array, which means that `poor_sum` will not work for generators) + +```julia +function stable_sum(x) + s = zero(eltype(x)) + for xᵢ in x + s += xᵢ + end + s +end +``` + +But there is no difference, due to small union optimization (the above would kill any performance in older versions.) + +```julia +julia> @benchmark stable_sum(x) +BenchmarkTools.Trial: 42 samples with 1 evaluation. + Range (min … max): 119.491 ms … 123.062 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 120.535 ms ┊ GC (median): 0.00% + Time (mean ± σ): 120.687 ms ± 819.740 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + █ + ▅▁▅▁▅▅██▅▁█▁█▁██▅▁█▅▅▁█▅▁█▁█▅▅▅█▁▁▁▁▁▁▁▅▁▁▁▁▁▅▁▅▁▁▁▁▁▁▅▁▁▁▁▁▅ ▁ + 119 ms Histogram: frequency by time 123 ms < + + Memory estimate: 16 bytes, allocs estimate: 1. +``` + +::: tip + +The optimization of small unions has been added in Julia 1.0. If we compare the of the same function in Julia 0.6, the difference is striking +```julia +julia> @time poor_sum(x) + 1.863665 seconds (300.00 M allocations: 4.470 GiB, 4.29% gc time) +9647.736705951513 +julia> @time stable_sum(x) + 0.167794 seconds (5 allocations: 176 bytes) +9647.736705951513 +``` +The optimization of small unions is a big deal. It simplifies implementation of arrays with missing values, or allows to signal that result has not been produced by returning `missing`. In case of arrays with missing values, the type of element is `Union{Missing,T}` where `T` is the type of non-missing element. + +::: + +We can tell Julia that it is safe to vectorize the code. Julia tries to vectorize anyway, but @simd macro allows more aggressive operations, such as instruction reordering, which might change the output due imprecision of representation of real numbers in Floats. + +```julia +function simd_sum(x) + s = zero(eltype(x)) + @simd for xᵢ in x + s += xᵢ + end + s +end +``` + +```julia +julia> @benchmark simd_sum(x) +BenchmarkTools.Trial: 90 samples with 1 evaluation. + Range (min … max): 50.854 ms … 62.260 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 54.656 ms ┊ GC (median): 0.00% + Time (mean ± σ): 55.630 ms ± 3.437 ms ┊ GC (mean ± σ): 0.00% ± 0.00% + + █ ▂ ▄ ▂ ▂ ▂ ▄ + ▄▆█▆▁█▄██▁▁█▆██▆▄█▁▆▄▁▆▆▄▁▁▆▁▁▁▁▄██▁█▁▁█▄▄▆▆▄▄▁▄▁▁▁▄█▁▆▁▆▁▆ ▁ + 50.9 ms Histogram: frequency by time 62.1 ms < + + Memory estimate: 16 bytes, allocs estimate: 1. +``` + +## Untyped global variables introduce type instability + +```julia +function implicit_sum() + s = zero(eltype(y)) + @simd for yᵢ in y + s += yᵢ + end + s +end +``` + +```julia +julia> y = randn(10^8); +julia> @benchmark implicit_sum() +BenchmarkTools.Trial: 1 sample with 1 evaluation. + Single result which took 10.837 s (11.34% GC) to evaluate, + with a memory estimate of 8.94 GiB, over 499998980 allocations. +``` + +What? The same function where I made the parameters to be implicit has just turned **nine orders of magnitude** slower? + +Let's look what the profiler says + +```julia +prof = @profview implicit_sum() +ProfileCanvas.html_file("profiles/profile5.html", prof) +``` + +(output available [here](profiles/profile5.html)) which does not say anything except that there is a huge type-instability (red bar). In fact, the whole computation is dominated by Julia constantly determining the type of something. + +How can we determine, where is the type instability? + +- `@code_typed implicit_sum()` is +- `Cthulhu.jl` as `@descend implicit_sum()` +- `JET.jl` as `@report_opt implicit_sum()` + +::: tip JET + +JET is a code analyzer, which analyze the code without actually invoking it. The technique is called "abstract interpretation" and JET internally uses Julia's native type inference implementation, so it can analyze code as fast/correctly as Julia's code generation. JET internally traces the compiler's knowledge about types and detects, where the compiler cannot infer the type (outputs `Any`). Note that small unions are no longer considered type instability, since as we have seen above, the performance bottleneck is small. We can use JET as + +```julia +using JET + +@report_opt implicit_sum() +``` + +::: + +All of these tools tells us that the Julia's compiler cannot determine the type of `x`. But why? I can just invoke `typeof(x)` and I know immediately the type of `x`. + +To understand the problem, you have to think about the compiler. + +1. You define function `implicit_sum().` +2. If you call `implicit_sum` and `y` does not exist, Julia will happily crash. +3. If you call `implicit_sum` and `y` exist, the function will give you the result (albeit slowly). At this moment, Julia has to specialize `implicit_sum`. It has two options how to behave with respect to `y`. + + a. The compiler can assume that type of `y` is the current `typeof(y)` but that would mean that if a user redefines `y` and change the type, the specialization of the function `implicit_sum` will assume the wrong type of `y` and it can have unexpected results. + + b. The compiler take safe approach and determine the type of `y` inside the function `implicit_sum` and behave accordingly (recall that julia is dynamically typed). Yet, not knowing the type precisely is absolute disaster for performance. You can see this assumption for yourself by typing `@code_typed implicit_sum()`. + +Notice the compiler dispatches on the name of the function and type of its arguments, hence, the compiler cannot create different versions of `implicit_sum` for different types of `y`, since it is not an argument, hence the dynamic resolution of types `y` inside `implicit_sum` function. + +Julia takes the **safe approach**, which we can verify that although the `implicit_sum` was specialized (compiled) when `y` was `Vector{Float64}`, it works for other types + +```julia +y = rand(Int, 1000) + +implicit_sum() ≈ sum(y) + +y = map(Complex, rand(1000), rand(1000)) + +implicit_sum() ≈ sum(y) + +using SoftPosit + +y = Posit16.(rand(Float64, 1000)) + +implicit_sum() ≈ sum(y) +``` + +This means, using global variables inside functions without passing them as arguments ultimately leads to type-instability. What are the solutions? + +### Julia 1.7 and below => Declaring `y` as const + +We can declare `y` as const, which tells the compiler that `y` will not change (and for the compiler mainly indicates **that type of `y` will not change**). + +Let's see that, but restart the julia before trying. After defining `y` as `const`, we see that the speed is the same as of `simd_sum()`. + +```julia +julia> @benchmark implicit_sum() +BenchmarkTools.Trial: 99 samples with 1 evaluation. + Range (min … max): 47.864 ms … 58.365 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 50.042 ms ┊ GC (median): 0.00% + Time (mean ± σ): 50.479 ms ± 1.598 ms ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▂ █▂▂▇ ▅ ▃ + ▃▁▃▁▁▁▁▇██████▅█▆██▇▅▆▁▁▃▅▃▃▁▃▃▁▃▃▁▁▃▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▃ ▁ + 47.9 ms Histogram: frequency by time 57.1 ms < + + Memory estimate: 0 bytes, allocs estimate: 0. +``` + +Also notice the difference in `@code_typed implicit_sum()` + +### Julia 1.8 and above => Provide type to `y` + +Julia 1.8 added support for typed global variables which solves the above problem as can be seen from (do not forget to restart julia) + +```julia +y::Vector{Float64} = rand(10^8); + +@benchmark implicit_sum() +@code_typed implicit_sum() +``` + +Unlike in `const`, we are free to change the bindings if it is possible to convert it to `typeof(y)` + +```julia +y = [1.0,2.0] +typeof(y) + +y = [1,2] +typeof(y) +``` + +but `y = ["1","2"]` will issue an error, since `String` has no default conversion rule to `Float64` (you can overwrite this by defining `Base.convert(::Type{Float64}, s::String) = parse(Float64, s)` but it will likely lead to all kinds of shenanigans). + +### Barier function + +Recall the reason, why the `implicit_sum` is so slow is that every time the function invokes `getindex` and `+`, it has to resolve types. The solution would be to limit the number of resolutions, which can done by passing all parameters to inner function as follows (do not forget to restart julia if you have defined `y` as const before). + +```julia +using BenchmarkTools + +function barrier_sum() + simd_sum(y) +end + +y = randn(10^8); +``` + +```julia +@benchmark barrier_sum() +BenchmarkTools.Trial: 93 samples with 1 evaluation. + Range (min … max): 50.229 ms … 58.484 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 53.882 ms ┊ GC (median): 0.00% + Time (mean ± σ): 54.064 ms ± 2.892 ms ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▂▆█ ▆▄ + ▆█████▆▄█▆▄▆▁▄▄▄▄▁▁▄▁▄▄▆▁▄▄▄▁▁▄▁▁▄▁▁▆▆▁▁▄▄▁▄▆████▄▆▄█▆▄▄▄▄█ ▁ + 50.2 ms Histogram: frequency by time 58.4 ms < + + Memory estimate: 16 bytes, allocs estimate: 1. +``` + +```julia +using JET + +@report_opt barrier_sum() +``` + +## Checking bounds is expensive +By default, julia checks bounds on every access to a location on an array, which can be difficult. Let's demonstrate it on the below example of linear attention layer taken from +(Retentive neural networks)[https://arxiv.org/abs/2307.08621] + +```julia +struct DMatrix{T} <:AbstractMatrix{T} + γ::T + size::Int64 +end + +Base.getindex(d::DMatrix{T}, n::Integer, m::Integer) where {T} = n ≥ m ? d.γ^(n-m) : zero(T) +Base.size(d::DMatrix,i::Integer) = d.size +Base.size(d::DMatrix) = (d.size, d.size) + + +function linear_attention_product(Q, K, V, γ::Real) + D = DMatrix(γ, size(Q, 2)); + transpose(((Q' * K) .* D) * V') +end +``` + +The above implementation is great, but is consumes a lot of memory, which is critical asset in training of large models. We therefore implement a variant with a forloops as + +```julia +function linear_attention_forloop(Q, K, V, γ) + o = zeros(eltype(V), size(V)) + for n in axes(Q,2) + γₙ = γ ^ n + for m in 1:n + α = zero(eltype(Q)) + for k in axes(Q,1) + α += Q[k, n] * K[k, m] + end + + γₙ /= γ + for k in axes(V,1) + o[k, n] += γₙ * α * V[k, m] + end + end + end + o +end +``` + +We `@benchmark` to measure to compare the results + +```julia +Q = randn(32, 257) +K = randn(32, 257) +V = randn(32, 257) +γ = 0.99 + +julia> linear_attention_forloop(Q, K, V, γ) ≈ linear_attention_product(Q, K, V, γ) +true + +julia> @benchmark linear_attention_product(Q, K, V, γ) +BenchmarkTools.Trial: 2420 samples with 1 evaluation. + Range (min … max): 328.166 μs … 14.099 ms ┊ GC (min … max): 0.00% … 97.25% + Time (median): 379.333 μs ┊ GC (median): 0.00% + Time (mean ± σ): 412.396 μs ± 391.318 μs ┊ GC (mean ± σ): 7.40% ± 9.40% + + ▂▂█▃▁ + █████▇▇▅▄▁▃▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▅▃▄▃▄▁▁▃ █ + 328 μs Histogram: log(frequency) by time 1.6 ms < + + Memory estimate: 1.07 MiB, allocs estimate: 10. + +julia> @benchmark linear_attention_forloop(Q, K, V, γ) +BenchmarkTools.Trial: 822 samples with 1 evaluation. + Range (min … max): 1.203 ms … 1.263 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 1.213 ms ┊ GC (median): 0.00% + Time (mean ± σ): 1.217 ms ± 10.143 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▂▅▆█▅▇▇▅▃▃ + ▂▃▅▇▆███████████▅▅█▄▅▃▄▄▄▄▅▄▅▃▄▄▄▄▄▃▃▅▄▄▃▅▃▄▃▃▃▄▁▂▂▂▁▂▁▁▂▃ ▄ + 1.2 ms Histogram: frequency by time 1.25 ms < + + Memory estimate: 64.33 KiB, allocs estimate: 3. +``` + +Where we see that the first version is faster, but allocates more memory. The second version is slower, but allocates much less. Julia by default checks bounds in access to arrays to ensure the memory is accessed within bounts. We can ask julia to omit this check using macro `@inbounds` as + +```julia +function linear_attention_inforloop(Q, K, V, γ) + o = zeros(eltype(V), size(V)) + @inbounds for n in axes(Q,2) + γₙ = γ ^ n + for m in 1:n + α = zero(eltype(Q)) + for k in axes(Q,1) + α += Q[k, n] * K[k, m] + end + + γₙ /= γ + for k in axes(V,1) + o[k, n] += γₙ * α * V[k, m] + end + end + end + o +end + +julia> linear_attention_inforloop(Q, K, V, γ) ≈ linear_attention_product(Q, K, V, γ) +true + +julia> @benchmark linear_attention_inforloop(Q, K, V, γ) +BenchmarkTools.Trial: 2440 samples with 1 evaluation. + Range (min … max): 400.166 μs … 1.005 ms ┊ GC (min … max): 0.00% … 0.00% + Time (median): 404.875 μs ┊ GC (median): 0.00% + Time (mean ± σ): 409.178 μs ± 38.104 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + ▂▅▇██▆▅▅▅▅▅▄▃▂▁ ▁ + ▄▆▄▅████████████████████▆▇▆▇▆▇▅▆▇▇█▇▇█▆▄▅▅▄▁▅▁▁▅▅▄▄▄▁▁▄▁▄▁▁▄ █ + 400 μs Histogram: log(frequency) by time 433 μs < + + Memory estimate: 64.33 KiB, allocs estimate: 3. +``` + +By not checking the bounds, we bring the speed close to the version based on matrix multiplication, while having small memory requirements (further speedup can be achieved using threadding). + +## Boxing in closure + +Recall closure is a function which contains some parameters contained + +An example of closure (adopted from JET.jl) + +```julia +function abmult(r::Int) + if r < 0 + r = -r + end + # the closure assigned to `f` make the variable `r` captured + f = x -> x * r + return f +end; +``` + +Another example of closure counting the error and printing it every `steps` + +```julia +function initcallback(; steps = 10) + i = 0 + ts = time() + y = 0.0 + cby = function evalcb(_y) + i += 1.0 + y += _y + if mod(i, steps) == 0 + l = y / steps + y = 0.0 + println(i, ": loss: ", l," time per step: ",round((time() - ts)/steps, sigdigits = 2)) + ts = time() + end + end + cby +end + +cby = initcallback() + +for i in 1:100 + cby(rand()) +end +``` + +```julia +function simulation() + cby = initcallback(;steps = 10000) #intentionally disable printing + for i in 1:1000 + cby(sin(rand())) + end +end + +@benchmark simulation() +``` + +```julia +using ProfileCanvas +@profview (for i in 1:100; simulation(); end) +``` + +We see a red bars in lines 4 and 8 of evalcb, which indicates the type instability hindering the performance. Why they are there? The answer is tricky. + +In closures, as the name suggest, function *closes over* (or captures) some variables defined in the function outside the function that is returned. If these variables are of primitive types (think `Int`, `Float64`, etc.), the compiler assumes that they might be changed. Though when primitive types are used in calculations, the result is not written to the same memory location but to a new location and the name of the variable is made to point to this new variable location (this is called rebinding). We can demonstrate it on this example [^2]. + +[^2]: Invenia blog [entry](https://invenia.github.io/blog/2019/10/30/julialang-features-part-1/) + +```julia +julia> x = [1]; + +julia> objectid(x) +0x79eedc509237c203 + +julia> x .= [10]; # mutating contents + +julia> objectid(x) +0x79eedc509237c203 + +julia> y = 100; + +julia> objectid(y) +0xdb216d4e5c739c77 + +julia> y = y + 100; # rebinding the variable name + +julia> objectid(y) +0xb642af5f06b41e88 +``` + +Since the inner function needs to point to the same location, julia uses `Box` container which can be seen as a translation, where the pointer inside the Box can change while the inner function contains the same pointer to the `Box`. This makes possible to change the captured variables and tracks changes in the point. Sometimes (it can happen many time) the compiler fails to determine that the captured variable is read only, and it wrap it (box it) in the `Box` wrapper, which makes it type unstable, as `Box` does not track types (it would be difficult as even the type can change in the inner function). This is what we can see in the first example of `abmult`. In the second example, the captured variable `y` and `i` changes and the compiler is right. + +What can we do? + +- The first difficulty is to even detect this case. We can spot it using `@code_typed` and of course `JET.jl` can do it and it will warn us. Above we have seen the effect of the profiler. +Using `@code_typed` + +```julia +julia> @code_typed abmult(1) +CodeInfo( +1 ─ %1 = Core.Box::Type{Core.Box} +│ %2 = %new(%1, r@_2)::Core.Box +│ %3 = Core.isdefined(%2, :contents)::Bool +└── goto #3 if not %3 +2 ─ goto #4 +3 ─ $(Expr(:throw_undef_if_not, :r, false))::Any +4 ┄ %7 = Core.getfield(%2, :contents)::Any +│ %8 = (%7 < 0)::Any +└── goto #9 if not %8 +5 ─ %10 = Core.isdefined(%2, :contents)::Bool +└── goto #7 if not %10 +6 ─ goto #8 +7 ─ $(Expr(:throw_undef_if_not, :r, false))::Any +8 ┄ %14 = Core.getfield(%2, :contents)::Any +│ %15 = -%14::Any +└── Core.setfield!(%2, :contents, %15)::Any +9 ┄ %17 = %new(Main.:(var"#5#6"), %2)::var"#5#6" +└── return %17 +) => var"#5#6" +``` + +Using `Jet.jl` (recall it requires the very latest Julia 1.7) + +``` +julia> @report_opt abmult(1) +═════ 3 possible errors found ═════ +┌ @ REPL[15]:2 r = Core.Box(:(_7::Int64)) +│ captured variable `r` detected +└────────────── +┌ @ REPL[15]:2 Main.<(%7, 0) +│ runtime dispatch detected: Main.<(%7::Any, 0) +└────────────── +┌ @ REPL[15]:3 Main.-(%14) +│ runtime dispatch detected: Main.-(%14::Any) +└────────────── +``` + +- Sometimes, we do not have to do anything. For example the above example of `evalcb` function, we assume that all the other code in the simulation would take much more time so a little type instability is not important. +- Alternatively, we can explicitly use `Ref` instead of the `Box`, which are typed wrappers, but they are awkward to use. + +```julia +function ref_abmult(r::Int) + if r < 0 + r = -r + end + rr = Ref(r) + f = x -> x * rr[] + return f +end; +``` + +We can see in `@code_typed` that the compiler is happy as it can resolve the types correctly + +```julia +julia> @code_typed ref_abmult(1) +CodeInfo( +1 ─ %1 = Base.slt_int(r@_2, 0)::Bool +└── goto #3 if not %1 +2 ─ %3 = Base.neg_int(r@_2)::Int64 +3 ┄ %4 = φ (#2 => %3, #1 => _2)::Int64 +│ %5 = %new(Base.RefValue{Int64}, %4)::Base.RefValue{Int64} +│ %6 = %new(var"#7#8"{Base.RefValue{Int64}}, %5)::var"#7#8"{Base.RefValue{Int64}} +└── return %6 +) => var"#7#8"{Base.RefValue{Int64}} +``` + +Jet is also happy. + +```julia + +julia> @report_opt ref_abmult(1) +No errors ! +``` + +So when you use closures, you should be careful of the accidental boxing, since it can inhibit the speed of code. **This is a big deal in Multithreadding and in automatic differentiation**, both heavily uses closures. You can track the discussion [here](https://github.com/JuliaLang/julia/issues/15276). + + +## NamedTuples are more efficient that Dicts + +It happens a lot in scientific code, that some experiments have many parameters. It is therefore very convenient to store them in `Dict`, such that when adding a new parameter, we do not have to go over all defined functions and redefine them. + +Imagine that we have a (nonsensical) simulation like + +```julia +settings = Dict(:stepsize => 0.01, :h => 0.001, :iters => 500, :info => "info") +function find_min!(f, x, p) + for i in 1:p[:iters] + x̃ = x + p[:h] + fx = f(x) # line 4 + x -= p[:stepsize] * (f(x̃) - fx)/p[:h] # line 5 + end + x +end +``` + +Notice the parameter `p` is a `Dict` and that it can contain arbitrary parameters, which is useful. Hence, `Dict` is cool for passing parameters. +Let's now run the function through the profiler + +```julia +x₀ = rand() +f(x) = x^2 +prof = @profview find_min!(f, x₀, settings) +ProfileCanvas.html_file("profiles/profile6.html", prof) +``` + +from the profiler's output [here](profiles/profile6.html) we can see some type instabilities. Where they come from? The compiler does not have any information about types stored in `settings`, as the type of stored values are `Any` (caused by storing `String` and `Int`). + +```julia +julia> typeof(settings) +Dict{Symbol, Any} +``` + +The second problem is `get` operation on dictionaries is very time consuming operation (although technically it is O(1)), because it has to search the key in the list. `Dict`s are designed as a mutable container, which is not needed in our use-case, as the settings are static. For similar use-cases, Julia offers `NamedTuple`, with which we can construct settings as + +```julia +nt_settings = (;stepsize = 0.01, h=0.001, iters=500, :info => "info") +``` + +The `NamedTuple` is fully typed, but which we mean the names of fields are part of the type definition and fields are also part of type definition. You can think of it as a struct. Moreover, when accessing fields in `NamedTuple`, compiler knows precisely where they are located in the memory, which drastically reduces the access time. + + +Let's see the effect in `BenchmarkTools`. + +```julia +julia> @benchmark find_min!(x -> x^2, x₀, settings) +BenchmarkTools.Trial: 10000 samples with 1 evaluation. + Range (min … max): 86.350 μs … 4.814 ms ┊ GC (min … max): 0.00% … 97.61% + Time (median): 90.747 μs ┊ GC (median): 0.00% + Time (mean ± σ): 102.405 μs ± 127.653 μs ┊ GC (mean ± σ): 4.69% ± 3.75% + + ▅██▆▂ ▁▁ ▁ ▂ + ███████▇▇████▇███▇█▇████▇▇▆▆▇▆▇▇▇▆▆▆▆▇▆▇▇▅▇▆▆▆▆▄▅▅▄▅▆▆▅▄▅▃▅▃▅ █ + 86.4 μs Histogram: log(frequency) by time 209 μs < + + Memory estimate: 70.36 KiB, allocs estimate: 4002. + +julia> @benchmark find_min!(x -> x^2, x₀, nt_settings) +BenchmarkTools.Trial: 10000 samples with 7 evaluations. + Range (min … max): 4.179 μs … 21.306 μs ┊ GC (min … max): 0.00% … 0.00% + Time (median): 4.188 μs ┊ GC (median): 0.00% + Time (mean ± σ): 4.493 μs ± 1.135 μs ┊ GC (mean ± σ): 0.00% ± 0.00% + + █▃▁ ▁ ▁ ▁ ▁ + ████▇████▄██▄█▃██▄▄▇▇▇▇▅▆▆▅▄▄▅▄▅▅▅▄▁▅▄▁▄▄▆▆▇▄▅▆▄▄▃▄▆▅▆▁▄▄▄ █ + 4.18 μs Histogram: log(frequency) by time 10.8 μs < + + Memory estimate: 16 bytes, allocs estimate: 1. +``` + +Checking the output with JET, there is no type instability anymore + +```julia +@report_opt find_min!(f, x₀, nt_settings) +No errors ! +``` + +## Don't use IO unless you have to + +- debug printing in performance critical code should be kept to minimum or using in memory/file based logger in stdlib `Logging.jl` + +```julia +function find_min!(f, x, p; verbose=true) + for i in 1:p[:iters] + x̃ = x + p[:h] + fx = f(x) + x -= p[:stepsize] * (f(x̃) - fx)/p[:h] + verbose && println("x = ", x, " | f(x) = ", fx) + end + x +end + +@btime find_min!($f, $x₀, $params_tuple; verbose=true) +@btime find_min!($f, $x₀, $params_tuple; verbose=false) +``` + +- [interpolation of strings is even worse](https://docs.julialang.org/en/v1/manual/performance-tips/#Avoid-string-interpolation-for-I/O) + +```julia +function find_min!(f, x, p; verbose=true) + for i in 1:p[:iters] + x̃ = x + p[:h] + fx = f(x) + x -= p[:stepsize] * (f(x̃) - fx)/p[:h] + verbose && println("x = $x | f(x) = $fx") + end + x +end +@btime find_min!($f, $x₀, $params_tuple; verbose=true) +``` \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_05/poly_horner.png b/docs_vitepress/src/lectures/lecture_05/poly_horner.png new file mode 100644 index 00000000..05418fb5 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_05/poly_horner.png differ diff --git a/docs_vitepress/src/lectures/lecture_05/poly_stable.png b/docs_vitepress/src/lectures/lecture_05/poly_stable.png new file mode 100644 index 00000000..9d84b407 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_05/poly_stable.png differ diff --git a/docs_vitepress/src/lectures/lecture_05/poly_unstable.png b/docs_vitepress/src/lectures/lecture_05/poly_unstable.png new file mode 100644 index 00000000..e000409e Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_05/poly_unstable.png differ diff --git a/docs_vitepress/src/lectures/lecture_05/profiles/profile.html b/docs_vitepress/src/lectures/lecture_05/profiles/profile.html new file mode 100644 index 00000000..276af1cd --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/profiles/profile.html @@ -0,0 +1,26 @@ + + + + + +
+ + + + diff --git a/docs_vitepress/src/lectures/lecture_05/profiles/profile2.html b/docs_vitepress/src/lectures/lecture_05/profiles/profile2.html new file mode 100644 index 00000000..88c63f57 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/profiles/profile2.html @@ -0,0 +1,26 @@ + + + + + +
+ + + + diff --git a/docs_vitepress/src/lectures/lecture_05/profiles/profile3.html b/docs_vitepress/src/lectures/lecture_05/profiles/profile3.html new file mode 100644 index 00000000..edd4e176 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/profiles/profile3.html @@ -0,0 +1,26 @@ + + + + + +
+ + + + diff --git a/docs_vitepress/src/lectures/lecture_05/profiles/profile4.html b/docs_vitepress/src/lectures/lecture_05/profiles/profile4.html new file mode 100644 index 00000000..f6d4cea0 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/profiles/profile4.html @@ -0,0 +1,26 @@ + + + + + +
+ + + + diff --git a/docs_vitepress/src/lectures/lecture_05/profiles/profile5.html b/docs_vitepress/src/lectures/lecture_05/profiles/profile5.html new file mode 100644 index 00000000..b22e4f24 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/profiles/profile5.html @@ -0,0 +1,26 @@ + + + + + +
+ + + + diff --git a/docs_vitepress/src/lectures/lecture_05/profiles/profile6.html b/docs_vitepress/src/lectures/lecture_05/profiles/profile6.html new file mode 100644 index 00000000..a7056214 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/profiles/profile6.html @@ -0,0 +1,26 @@ + + + + + +
+ + + + diff --git a/docs_vitepress/src/lectures/lecture_05/root_finding.jl b/docs_vitepress/src/lectures/lecture_05/root_finding.jl new file mode 100644 index 00000000..31ca23f9 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/root_finding.jl @@ -0,0 +1,112 @@ +using LinearAlgebra +using Printf + +function _polynomial(a, x) + accumulator = a[end] * one(x) + for i in length(a)-1:-1:1 + accumulator = accumulator * x + a[i] + end + accumulator +end + +# definition of polynom +struct Polynom{C} + coefficients::C + Polynom(coefficients::CC) where CC = coefficients[end] == 0 ? throw(ArgumentError("Coefficient of the highest exponent cannot be zero.")) : new{CC}(coefficients) +end + +# based on https://github.com/JuliaMath/Polynomials.jl +function from_roots(roots::AbstractVector{T}; aₙ = one(T)) where {T} + n = length(roots) + c = zeros(T, n+1) + c[1] = one(T) + for j = 1:n + for i = j:-1:1 + c[i+1] = c[i+1]-roots[j]*c[i] + end + end + return Polynom(aₙ.*reverse(c)) +end + +(p::Polynom)(x) = _polynomial(p.coefficients, x) +degree(p::Polynom) = length(p.coefficients) - 1 + +function _derivativeof(p::Polynom) + n = degree(p) + n > 1 ? Polynom([(i - 1)*p.coefficients[i] for i in 2:n+1]) : error("Low degree of a polynomial.") +end +LinearAlgebra.adjoint(p::Polynom) = _derivativeof(p) + +function Base.show(io::IO, p::Polynom) + n = degree(p) + a = reverse(p.coefficients) + for (i, c) in enumerate(a[1:end-1]) + if (c != 0) + c < 0 && print(io, " - ") + c > 0 && i > 1 && print(io, " + ") + print(io, "$(abs(c))x^$(n - i + 1)") + end + end + c = a[end] + c > 0 && print(io, " + $(c)") + c < 0 && print(io, " - $(abs(c))") +end + +# default optimization parameters +atol = 1e-12 +maxiter = 100 +stepsize = 0.95 + +# definition of optimization methods +abstract type RootFindingMethod end +struct Newton <: RootFindingMethod end +struct Secant <: RootFindingMethod end +struct Bisection <: RootFindingMethod end + +init!(::Bisection, p, a, b) = sign(p(a)) != sign(p(b)) ? (a, b) : throw(ArgumentError("Signs at both ends are the same.")) +init!(::RootFindingMethod, p, a, b) = (a, b) + +function step!(::Newton, poly::Polynom, xᵢ, step_size) + _, x̃ = xᵢ + dp = p' + x̃, x̃ - step_size*p(x̃)/dp(x̃) +end + +function step!(::Secant, poly::Polynom, xᵢ, step_size) + x, x̃ = xᵢ + dpx = (p(x) - p(x̃))/(x - x̃) + x̃, x̃ - stepsize*p(x̃)/dpx +end + +function step!(::Bisection, poly::Polynom, xᵢ, step_size) + x, x̃ = xᵢ + midpoint = (x + x̃)/2 + if sign(p(midpoint)) == sign(p(x̃)) + x̃ = midpoint + else + x = midpoint + end + x, x̃ +end + +function find_root(p::Polynom, rfm=Newton, a=-5.0, b=5.0, max_iter=100, step_size=0.95, tol=1e-12) + x, x̃ = init!(rfm, p, a, b) + for i in 1:maxiter + x, x̃ = step!(rfm, p, (x, x̃), step_size) + val = p(x̃) + @printf "x = %.5f | x̃ = %.5f | p(x̃) = %g\n" x x̃ val + abs(val) < atol && return x̃ + end + println("Method did not converge in $(max_iter) iterations to a root within $(tol) tolerance.") + return x̃ +end + +# test code +poly = Polynom(rand(4)) +p = from_roots([-3, -2, -1, 0, 1, 2, 3]) +dp = p' +p(3.0), dp(3.0) + +x₀ = find_root(p, Bisection(), -5.0, 5.0) +x₀ = find_root(p, Newton(), -5.0, 5.0) +x₀ = find_root(p, Secant(), -5.0, 5.0) diff --git a/docs_vitepress/src/lectures/lecture_05/sim.jl b/docs_vitepress/src/lectures/lecture_05/sim.jl new file mode 100644 index 00000000..c08c1577 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_05/sim.jl @@ -0,0 +1,68 @@ +using BenchmarkTools + +function polynomial(a, x) + accumulator = 0 + for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] # ! 1-based indexing for arrays + end + return accumulator +end + +function polynomial_stable(a, x) + accumulator = zero(x) + for i in length(a):-1:1 + accumulator += x^(i-1) * a[i] + end + accumulator +end + +function run_polynomial_stable(a, x, n) + for _ in 1:n + polynomial_stable(a, x) + end +end + +function run_polynomial(a, x, n) + for _ in 1:n + polynomial(a, x) + end +end + +function polynomial_horner(a, x) + accumulator = a[end] * one(x) + for i in length(a)-1:-1:1 + accumulator = accumulator * x + a[i] + end + accumulator +end +function run_polynomial_horner(a, x, n) + for _ in 1:n + polynomial_horner(a, x) + end +end + + + +a = rand(-10:10, 1000) # using longer polynomial +xf = 3.0 + + +run_polynomial(a, xf, Int(1e5)) +@profview run_polynomial(a, xf, Int(1e5)) + +run_polynomial_stable(a, xf, Int(1e5)) +@profview run_polynomial_stable(a, xf, Int(1e5)) + +run_polynomial_horner(a, xf, Int(1e5)) +@profview run_polynomial_horner(a, xf, Int(1e5)) + +# a = rand(-10:10, 100) +# polynomial(a,3) +# @profview for _ in 1:100000 polynomial(a, 3) end +# +# polynomial(a,3.0) +# #@profview for _ in 1:100000 polynomial(a, 3.0) end +# @btime for _ in 1:100000 polynomial($a, 3.0) end +# +# # @time for _ in 1:1000 polynomial(a, 3.0) end +# diff --git a/docs_vitepress/src/lectures/lecture_06/hw.md b/docs_vitepress/src/lectures/lecture_06/hw.md new file mode 100644 index 00000000..0e7aee8e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_06/hw.md @@ -0,0 +1,48 @@ +# Homework 6: Find variables + +Following the lab exercises, you may think that metaprogramming is a fun little exercise. Let's challenge this notion in this homework, where *YOU* are being trusted with catching all the edge cases in an AST. + +## How to submit? + +Put the code of the compulsory task inside `hw.jl`. Zip only this file (not its parent folder) and upload it to BRUTE. Your file should not use any 3rd party dependency. + +::: danger Homework (2 points) + +Your task is to find all single letter variables in an expression, i.e. for example when given expression + +```julia +x + 2*y*z - c*x +``` + +return an array of *unique alphabetically sorted symbols* representing variables in an expression. + +```julia +[:c, :x, :y, :z] +``` + +Implement this in a function called `find_variables`. Note that there may be some edge cases that you may have to handle in a special way, such as + +- variable assignments `r = x*x` should return the variable on the left as well (`r` in this case) +- ignoring symbols representing single letter function calls such as `f(x)` + +::: + +::: details Show solution + +Nothing to see here. + +::: + +# Voluntary exercise + +::: danger Voluntary exercise + +Create a function that replaces each of `+`, `-`, `*` and `/` with the respective checked operation, which checks for overflow. E.g. `+` should be replaced by `Base.checked_add`. + +::: + +::: details Show solution + +Not yet published. + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_06/lab.md b/docs_vitepress/src/lectures/lecture_06/lab.md new file mode 100644 index 00000000..5be12a46 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_06/lab.md @@ -0,0 +1,450 @@ +# [Lab 06: Code introspection and metaprogramming](@id introspection_lab) + +In this lab we are first going to inspect some tooling to help you understand what Julia does under the hood such as: + +- looking at the code at different levels +- understanding what method is being called +- showing different levels of code optimization + +Secondly we will start playing with the metaprogramming side of Julia, mainly covering: + +- how to view abstract syntax tree (AST) of Julia code +- how to manipulate AST + +These topics will be extended in the next lecture/lab, where we are going use metaprogramming to manipulate code with macros. + +We will be again a little getting ahead of ourselves as we are going to use quite a few macros, which will be properly explained in the next lecture as well, however for now the important thing to know is that a macro is just a special function, that accepts as an argument Julia code, which it can modify. + +## Quick reminder of introspection tooling + +Let's start with the topic of code inspection, e.g. we may ask the following: What happens when Julia evaluates `[i for i in 1:10]`? + +- parsing +```@repl lab06_intro +using InteractiveUtils #hide +:([i for i in 1:10]) |> dump +``` + +- lowering +```@repl lab06_intro +Meta.@lower [i for i in 1:10] +``` + +- typing +```@repl lab06_intro +f() = [i for i in 1:10] +@code_typed f() +``` + +- LLVM code generation +```@repl lab06_intro +@code_llvm f() +``` + +- native code generation +```@repl lab06_intro +@code_native f() +``` + +Let's see how these tools can help us understand some of Julia's internals on examples from previous labs and lectures. + +### Understanding runtime dispatch and type instabilities + +We will start with a question: Can we spot internally some difference between type stable/unstable code? + + +::: warning Exercise + +Inspect the following two functions using `@code_lowered`, `@code_typed`, `@code_llvm` and `@code_native`. +```@example lab06_intro +x = rand(10^5) +function explicit_len(x) + length(x) +end + +function implicit_len() + length(x) +end +nothing #hide +``` +For now do not try to understand the details, but focus on the overall differences such as length of the code. + +::: + +::: tip Redirecting `stdout` + +If the output of the method introspection tools is too long you can use a general way of redirecting standard output `stdout` to a file + +```julia +open("./llvm_fun.ll", "w") do file + original_stdout = stdout + redirect_stdout(file) + @code_llvm fun() + redirect_stdout(original_stdout) +end +``` + +In case of `@code_llvm` and `@code_native` there are special options, that allow this out of the box, see help `?` for underlying `code_llvm` and `code_native`. If you don't mind adding dependencies there is also the `@capture_out` from [`Suppressor.jl`](https://github.com/JuliaIO/Suppressor.jl) + +::: + +::: details Show solution + +```julia +@code_warntype explicit_sum(x) +@code_warntype implicit_sum() + +@code_typed explicit_sum(x) +@code_typed implicit_sum() + +@code_llvm explicit_sum(x) +@code_llvm implicit_sum() + +@code_native explicit_sum(x) +@code_native implicit_sum() +``` + +In this case we see that the generated code for such a simple operation is much longer in the type unstable case resulting in longer run times. However in the next example we will see that having longer code is not always a bad thing. + +::: + +### Loop unrolling + +In some cases the compiler uses loop unrolling[^1] optimization to speed up loops at the expense of binary size. The result of such optimization is removal of the loop control instructions and rewriting the loop into a repeated sequence of independent statements. + +[^1]: [https://en.wikipedia.org/wiki/Loop_unrolling](https://en.wikipedia.org/wiki/Loop\_unrolling) + +::: warning Exercise + +Inspect under what conditions does the compiler unroll the for loop in the `polynomial` function from the last [lab](@ref horner). + +```@example lab06_intro +function polynomial(a, x) + accumulator = a[end] * one(x) + for i in length(a)-1:-1:1 + accumulator = accumulator * x + a[i] + end + accumulator +end +nothing #hide +``` + +Compare the speed of execution with and without loop unrolling. + +**HINTS**: + +- these kind of optimization are lower level than intermediate language +- loop unrolling is possible when compiler knows the length of the input + +::: + +::: details Show solution + +```@example lab06_intro +using Test #hide +using BenchmarkTools +a = Tuple(ones(20)) # tuple has known size +ac = collect(a) +x = 2.0 + +@code_lowered polynomial(a,x) # cannot be seen here as optimizations are not applied +@code_typed polynomial(a,x) # loop unrolling is not part of type inference optimization +nothing #hide +``` + +```@repl lab06_intro +@code_llvm polynomial(a,x) +@code_llvm polynomial(ac,x) +``` + +More than 2x speedup + +```@repl lab06_intro +@btime polynomial($a,$x) +@btime polynomial($ac,$x) +``` + +::: + +### Recursion inlining depth + +Inlining[^2] is another compiler optimization that allows us to speed up the code by avoiding function calls. Where applicable compiler can replace `f(args)` directly with the function body of `f`, thus removing the need to modify stack to transfer the control flow to a different place. This is yet another optimization that may improve speed at the expense of binary size. + +[^2]: [https://en.wikipedia.org/wiki/Inline_expansion](https://en.wikipedia.org/wiki/Inline\_expansion) + +::: warning Exercise + +Rewrite the `polynomial` function from the last [lab](@ref horner) using recursion and find the length of the coefficients, at which inlining of the recursive calls stops occurring. + +```julia +function polynomial(a, x) + accumulator = a[end] * one(x) + for i in length(a)-1:-1:1 + accumulator = accumulator * x + a[i] + end + accumulator +end +``` + +**HINTS**: +- define two methods `_polynomial!(ac, x, a...)` and `_polynomial!(ac, x, a)` for the case of ≥2 coefficients and the last coefficient +- use splatting together with range indexing `a[1:end-1]...` +- the correctness can be checked using the built-in `evalpoly` +- recall that these kind of optimization are possible just around the type inference stage +- use container of known length to store the coefficients + +::: + +::: tip Splatting/slurping operator `...` + +The operator `...` serves two purposes inside function calls [^3][^4]: + +- combines multiple arguments into one +```@repl lab06_splat +function printargs(args...) + println(typeof(args)) + for (i, arg) in enumerate(args) + println("Arg #$i = $arg") + end +end +printargs(1, 2, 3) +``` + +- splits one argument into many different arguments +```@repl lab06_splat +function threeargs(a, b, c) + println("a = $a::$(typeof(a))") + println("b = $b::$(typeof(b))") + println("c = $c::$(typeof(c))") +end +threeargs([1,2,3]...) # or with a variable threeargs(x...) +``` + +[^3]: [https://docs.julialang.org/en/v1/manual/faq/#What-does-the-...-operator-do?](https://docs.julialang.org/en/v1/manual/faq/#What-does-the-...-operator-do?) +[^4]: [https://docs.julialang.org/en/v1/manual/functions/#Varargs-Functions](https://docs.julialang.org/en/v1/manual/functions/#Varargs-Functions) + +::: + +::: details Show solution + +```@example lab06_intro +_polynomial!(ac, x, a...) = _polynomial!(x * ac + a[end], x, a[1:end-1]...) +_polynomial!(ac, x, a) = x * ac + a +polynomial(a, x) = _polynomial!(a[end] * one(x), x, a[1:end-1]...) + +# the coefficients have to be a tuple +a = Tuple(ones(Int, 21)) # everything less than 22 gets inlined +x = 2 +polynomial(a,x) == evalpoly(x,a) # compare with built-in function + +# @code_llvm polynomial(a,x) # seen here too, but code_typed is a better option +@code_lowered polynomial(a,x) # cannot be seen here as optimizations are not applied +nothing #hide +``` + +```@repl lab06_intro +@code_typed polynomial(a,x) +``` + +::: + +## AST manipulation: The first steps to metaprogramming + +Julia is so called homoiconic language, as it allows the language to reason about its code. This capability is inspired by years of development in other languages such as Lisp, Clojure or Prolog. + +There are two easy ways to extract/construct the code structure [^5] +- parsing code stored in string with internal `Meta.parse` + +```@repl lab06_meta +code_parse = Meta.parse("x = 2") # for single line expressions (additional spaces are ignored) +code_parse_block = Meta.parse(""" +begin + x = 2 + y = 3 + x + y +end +""") # for multiline expressions +``` + +- constructing an expression using `quote ... end` or simple `:()` syntax +```@repl lab06_meta +code_expr = :(x = 2) # for single line expressions (additional spaces are ignored) +code_expr_block = quote + x = 2 + y = 3 + x + y +end # for multiline expressions +``` + +Results can be stored into some variables, which we can inspect further. + +```@repl lab06_meta +typeof(code_parse) +dump(code_parse) +``` + +```@repl lab06_meta +typeof(code_parse_block) +dump(code_parse_block) +``` + +The type of both multiline and single line expression is `Expr` with fields `head` and `args`. Notice that `Expr` type is recursive in the `args`, which can store other expressions resulting in a tree structure - abstract syntax tree (AST) - that can be visualized for example with the combination of `GraphRecipes` and `Plots` packages. + +```@example lab06_meta +using GraphRecipes #hide +using Plots #hide +plot(code_expr_block, fontsize=12, shorten=0.01, axis_buffer=0.15, nodeshape=:rect) +``` + +This recursive structure has some major performance drawbacks, because the `args` field is of type `Any` and therefore modifications of this expression level AST won't be type stable. Building blocks of expressions are `Symbol`s and literal values (numbers). + +A possible nuisance of working with multiline expressions is the presence of `LineNumber` nodes, which can be removed with `Base.remove_linenums!` function. + +```@repl lab06_meta +Base.remove_linenums!(code_parse_block) +``` + +Parsed expressions can be evaluate using `eval` function. + +```@repl lab06_meta +eval(code_parse) # evaluation of :(x = 2) +x # should be defined +``` + +::: warning Exercise + +Before doing anything more fancy let's start with some simple manipulation of ASTs. + +- Define a variable `code` to be as the result of parsing the string `"j = i^2"`. +- Copy code into a variable `code2`. Modify this to replace the power `2` with a power `3`. Make sure that the original code variable is not also modified. +- Copy `code2` to a variable `code3`. Replace `i` with `i + 1` in `code3`. +- Define a variable `i` with the value `4`. Evaluate the different code expressions using the `eval` function and check the value of the variable `j`. + +::: + +::: details Show solution + +```@repl lab06_meta +code = Meta.parse("j = i^2") +code2 = copy(code) +code2.args[2].args[3] = 3 +code3 = copy(code2) +code3.args[2].args[2] = :(i + 1) +i = 4 +eval(code), eval(code2), eval(code3) +``` + +::: + +Following up on the more general substitution of variables in an expression from the lecture, let's see how the situation becomes more complicated, when we are dealing with strings instead of a parsed AST. + +::: warning Exercise + +```@example lab06_meta +using Test #hide +replace_i(s::Symbol) = s == :i ? :k : s +replace_i(e::Expr) = Expr(e.head, map(replace_i, e.args)...) +replace_i(u) = u +nothing #hide +``` + +Given a function `replace_i`, which replaces variables `i` for `k` in an expression like the following + +```@repl lab06_meta +ex = :(i + i*i + y*i - sin(z)) +@test replace_i(ex) == :(k + k*k + y*k - sin(z)) +``` + +write a different function `sreplace_i(s)`, which does the same thing but instead of a parsed expression (AST) it manipulates a string, such as + +```@repl lab06_meta +s = string(ex) +``` + +**HINTS**: + +- Use `Meta.parse` in combination with `replace_i` **ONLY** for checking of correctness. +- You can use the `replace` function in combination with regular expressions. +- Think of some corner cases, that the method may not handle properly. + +::: + +::: details Show solution + +The naive solution + +```@repl lab06_meta +sreplace_i(s) = replace(s, 'i' => 'k') +@test Meta.parse(sreplace_i(s)) == replace_i(Meta.parse(s)) +``` + +does not work in this simple case, because it will replace "i" inside the `sin(z)` expression. We can play with regular expressions to obtain something, that is more robust + +```@repl lab06_meta +sreplace_i(s) = replace(s, r"([^\w]|\b)i(?=[^\w]|\z)" => s"\1k") +@test Meta.parse(sreplace_i(s)) == replace_i(Meta.parse(s)) +``` + +however the code may now be harder to read. Thus it is preferable to use the parsed AST when manipulating Julia's code. + +::: + +If the exercises so far did not feel very useful let's focus on one, that is similar to a part of the [`IntervalArithmetics.jl`](https://github.com/JuliaIntervals/IntervalArithmetic.jl) pkg. + +::: warning Exercise + +Write function `wrap!(ex::Expr)` which wraps literal values (numbers) with a call to `f()`. You can test it on the following example + +```@example lab06_meta +f = x -> convert(Float64, x) +ex = :(x*x + 2*y*x + y*y) # original expression +rex = :(x*x + f(2)*y*x + y*y) # result expression +nothing #hide +``` + +**HINTS**: + +- use recursion and multiple dispatch +- dispatch on `::Number` to detect numbers in an expression +- for testing purposes, create a copy of `ex` before mutating + +::: + +::: details Show solution + +```@repl lab06_meta +function wrap!(ex::Expr) + args = ex.args + + for i in 1:length(args) + args[i] = wrap!(args[i]) + end + + return ex +end + +wrap!(ex::Number) = Expr(:call, :f, ex) +wrap!(ex) = ex + +ext, x, y = copy(ex), 2, 3 +@test wrap!(ex) == :(x*x + f(2)*y*x + y*y) +eval(ext) +eval(ex) +``` + +::: + +This kind of manipulation is at the core of some pkgs, such as aforementioned [`IntervalArithmetics.jl`](https://github.com/JuliaIntervals/IntervalArithmetic.jl) where every number is replaced with a narrow interval in order to find some bounds on the result of a computation. + +--- + +[^5]: Once you understand the recursive structure of expressions, the AST can be constructed manually like any other type. + +## Resources + +- Julia's manual on [metaprogramming](https://docs.julialang.org/en/v1/manual/metaprogramming/) +- David P. Sanders' [workshop @ JuliaCon 2021](https://www.youtube.com/watch?v=2QLhw6LVaq0) +- Steven Johnson's [keynote talk @ JuliaCon 2019](https://www.youtube.com/watch?v=mSgXWpvQEHE) +- Andy Ferris's [workshop @ JuliaCon 2018](https://www.youtube.com/watch?v=SeqAQHKLNj4) +- [From Macros to DSL](https://github.com/johnmyleswhite/julia_tutorials) by John Myles White +- Notes on [JuliaCompilerPlugin](https://hackmd.io/bVhb97Q4QTWeBQw8Rq4IFw?both#Julia-Compiler-Plugin-Project) diff --git a/docs_vitepress/src/lectures/lecture_06/lecture.md b/docs_vitepress/src/lectures/lecture_06/lecture.md new file mode 100644 index 00000000..23c5289f --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_06/lecture.md @@ -0,0 +1,839 @@ +# [Language introspection](@id introspection) + +**What is metaprogramming?** *A high-level code that writes high-level code* by Steven Johnson. + +**Why do we need metaprogramming?** + +- In general, we do not need it, as we can do whatever we need without it, but it can help us to remove a boilerplate code. + + As an example, consider a `@show` macro, which just prints the name of the variable (or the expression) and its evaluation. This means that instead of writing `println("2+exp(4) = ", 2+exp(4))` we can just write `@show 2+exp(4)`. + + We have seen `@time` or `@benchmark`, which is difficult to implement using normal function, since when you pass `2+exp(4)` as a function argument, it will be automatically evaluated. You need to pass it as an expression, that can be evaluated within the function. + + `@chain` macro from [`Chain.jl`](https://github.com/jkrumbiegel/Chain.jl) improves over native piping `|>` + + We have seen `@forward` macro implementing **encapsulation**. + + Macros are used to insert compilation directives not accessible through the syntax, e.g. `@inbounds`. +- A chapter on its own is definition of **Domain Specific Languages**. + + +## Translation stages from source code to machine code + +Julia (as any modern compiler) uses several stages to convert source code to native code. + +- Julia's steps consists from [^1] + 1. parse the source code to **abstract syntax tree** (AST) --- `parse` function + 2. expand macros --- `macroexpand` function + 3. syntax desugaring + 4. stamentize controlflow + 5. resolve scopes + 6. generate intermediate representation (IR) ("goto" form) --- `expand` or `code_lowered` functions + 7. top-level evaluation, method sorting --- `methods` + 8. type inference + 9. inlining and high level optimization --- `code_typed` + 10. LLVM IR generation --- `code_llvm` + 11. LLVM optimizer, native code generation --- `code_native` +- steps 3-6 are done in inseparable stage +- Julia's IR is in [static single assignment form](https://en.wikipedia.org/wiki/Static_single_assignment_form) + +### Example: Fibonacci numbers + +Consider for example a function computing the Fibonacci numbers[^1] + +```julia +function nextfib(n) + a, b = one(n), one(n) + while b < n + a, b = b, a + b + end + return b +end +``` + +[^1]: From [StackOverflow ](https://stackoverflow.com/questions/43453944/what-is-the-difference-between-code-native-code-typed-and-code-llvm-in-julia) + + +#### Parsing + +The first thing the compiler does is that it will parse the source code (represented as a string) to the abstract syntax tree (AST). We can inspect the results of this stage as + +```julia +julia> parsed_fib = Meta.parse( +""" + function nextfib(n) + a, b = one(n), one(n) + while b < n + a, b = b, a + b + end + return b + end""") +:(function nextfib(n) + #= none:1 =# + #= none:2 =# + (a, b) = (one(n), one(n)) + #= none:3 =# + while b < n + #= none:4 =# + (a, b) = (b, a + b) + end + #= none:6 =# + return b + end) +``` + +AST is a tree representation of the source code, where the parser has already identified individual code elements function call, argument blocks, etc. The parsed code is represented by Julia objects, therefore it can be read and modified by Julia from Julia at your wish (this is what is called homo-iconicity of a language the itself being derived from Greek words *homo*- meaning "the same" and *icon* meaning "representation"). Using `TreeView` + +```julia +using TreeView, TikzPictures +g = tikz_representation(walk_tree(parsed_fib)) +TikzPictures.save(SVG("parsed_fib.svg"), g) +``` + +![parsed_fib.svg](parsed_fib.svg) + +We can see that the AST is indeed a tree, with `function` being the root node (caused by us parsing a function). Each inner node represents a function call with children of the inner node being its arguments. An interesting inner node is the `Block` representing a sequence of statements, where we can also see information about lines in the source code inserted as comments. Lisp-like S-Expression can be printed using `Meta.show_sexpr(parsed_fib)`. + +```julia +(:function, (:call, :nextfib, :n), (:block, + :(#= none:1 =#), + :(#= none:2 =#), + (:(=), (:tuple, :a, :b), (:tuple, (:call, :one, :n), (:call, :one, :n))), + :(#= none:3 =#), + (:while, (:call, :<, :b, :n), (:block, + :(#= none:4 =#), + (:(=), (:tuple, :a, :b), (:tuple, :b, (:call, :+, :a, :b))) + )), + :(#= none:6 =#), + (:return, :b) + )) +``` + +### Expanding macros + +If we insert a "useless" macro to `nextfib`, for example `@show b`, we see that the macro is not expanded and it is left there as-is. + +```julia +julia> parsed_fib = Meta.parse( +""" + function nextfib(n) + a, b = one(n), one(n) + while b < n + a, b = b, a + b + end + @show b + return b + end""") +:(function nextfib(n) + #= none:1 =# + #= none:2 =# + (a, b) = (one(n), one(n)) + #= none:3 =# + while b < n + #= none:4 =# + (a, b) = (b, a + b) + #= none:5 =# + end + #= none:6 =# + #= none:6 =# @show b + #= none:7 =# + return b + end) +``` + +and we can ask for expansion of the macro + +```julia +julia> macroexpand(Main, parsed_fib) +:(function nextfib(n) + #= none:1 =# + #= none:2 =# + (a, b) = (one(n), one(n)) + #= none:3 =# + while b < n + #= none:4 =# + (a, b) = (b, a + b) + #= none:5 =# + end + #= none:6 =# + begin + Base.println("b = ", Base.repr(begin + #= show.jl:1047 =# + local var"#62#value" = b + end)) + var"#62#value" + end + #= none:7 =# + return b + end) +``` + +#### Lowering + +The next stage is **lowering**, where AST is converted to Static Single Assignment Form (SSA), in which "each variable is assigned exactly once, and every variable is defined before it is used". Loops and conditionals are transformed into gotos and labels using a single unless/goto construct (this is not exposed in user-level Julia). + +```julia +julia> @code_lowered nextfib(3) +CodeInfo( +1 ─ %1 = Main.one(n) +│ %2 = Main.one(n) +│ a = %1 +└── b = %2 +2 ┄ %5 = b < n +└── goto #4 if not %5 +3 ─ %7 = b +│ %8 = a + b +│ a = %7 +│ b = %8 +└── goto #2 +4 ─ return b +) +``` + +or alternatively `lowered_fib = Meta.lower(@__MODULE__, parsed_fib)`. + +We can see that + +- compiler has introduced a lot of variables +- `while` (and `for`) loops has been replaced by a `goto`, where `goto` can be conditional + +For inserted debugging information, there is an option to pass keyword argument `debuginfo=:source`. + +```julia +julia> @code_lowered debuginfo=:source nextfib(3) +CodeInfo( + @ none:2 within `nextfib' +1 ─ %1 = Main.one(n) +│ %2 = Main.one(n) +│ a = %1 +└── b = %2 + @ none:3 within `nextfib' +2 ┄ %5 = b < n +└── goto #4 if not %5 + @ none:4 within `nextfib' +3 ─ %7 = b +│ %8 = a + b +│ a = %7 +│ b = %8 +└── goto #2 + @ none:6 within `nextfib' +4 ─ return b +) +``` + +#### Code Typing + +**Code typing** is the process in which the compiler attaches types to variables and tries to infer types of objects returned from called functions. If the compiler fails to infer the returned type, it will give the variable type `Any`, in which case a dynamic dispatch will be used in subsequent operations with the variable. Inspecting typed code is therefore important for detecting type instabilities (the process can be difficult and error prone, fortunately, new tools like `Jet.jl` may simplify this task). The output of typing can be inspected using `@code_typed` macro. If you know the types of function arguments, aka function signature, you can call directly function `InteractiveUtils.code_typed(nextfib, (typeof(3),))`. + +```julia +julia> @code_typed nextfib(3) +CodeInfo( +1 ─ nothing::Nothing +2 ┄ %2 = φ (#1 => 1, #3 => %6)::Int64 +│ %3 = φ (#1 => 1, #3 => %2)::Int64 +│ %4 = Base.slt_int(%2, n)::Bool +└── goto #4 if not %4 +3 ─ %6 = Base.add_int(%3, %2)::Int64 +└── goto #2 +4 ─ return %2 +) => Int64 +``` + +We can see that + +- some calls have been inlined, e.g. `one(n)` was replaced by `1` and the type was inferred as `Int`. +- The expression `b < n` has been replaced with its implementation in terms of the `slt_int` intrinsic ("signed integer less than") and the result of this has been annotated with return type `Bool`. +- The expression `a + b` has been also replaced with its implementation in terms of the `add_int` intrinsic and its result type annotated as Int64. +- And the return type of the entire function body has been annotated as `Int64`. +- The phi-instruction `%2 = φ (#1 => 1, #3 => %6)` is a **selector function**, which returns the value depending on from which branch do you come from. In this case, variable `%2` will have value 1, if the control was transfered from block `#1` and it will have value copied from variable `%6` if the control was transferreed from block `3` [see also](https://llvm.org/docs/LangRef.html#phi-instruction). The `φ` stands from *phony* variable. + +When we have called `@code_lower`, the role of types of arguments was in selecting - via multiple dispatch - the appropriate function body among different methods. Contrary in `@code_typed`, the types of parameters determine the choice of inner methods that need to be called (again with multiple dispatch). This process can trigger other optimization, such as inlining, as seen in the case of `one(n)` being replaced with `1` directly, though here this replacement is hidden in the `φ` function. + +Note that the same view of the code is offered by the `@code_warntype` macro, which we have seen in the previous [lecture](@ref perf_lecture). The main difference from `@code_typed` is that it highlights type instabilities with red color and shows only unoptimized view of the code. You can view the unoptimized code with a keyword argument `optimize=false`: + +```julia +julia> @code_typed optimize=false nextfib(3) +CodeInfo( +1 ─ %1 = Main.one(n)::Core.Const(1) +│ %2 = Main.one(n)::Core.Const(1) +│ (a = %1)::Core.Const(1) +└── (b = %2)::Core.Const(1) +2 ┄ %5 = (b < n)::Bool +└── goto #4 if not %5 +3 ─ %7 = b::Int64 +│ %8 = (a + b)::Int64 +│ (a = %7)::Int64 +│ (b = %8)::Int64 +└── goto #2 +4 ─ return b +) => Int64 +``` + +#### Lowering to LLVM IR + +Julia uses the LLVM compiler framework to generate machine code. LLVM stands for low-level virtual machine and it is basis of many modern compilers (see [wiki](https://en.wikipedia.org/wiki/LLVM)). +We can see the textual form of code lowered to LLVM IR by invoking + +```julia +julia> @code_llvm debuginfo=:source nextfib(3) +; @ REPL[10]:1 within `nextfib' +define i64 @julia_nextfib_890(i64 signext %0) { +top: + br label %L2 + +L2: ; preds = %L2, %top + %value_phi = phi i64 [ 1, %top ], [ %1, %L2 ] + %value_phi1 = phi i64 [ 1, %top ], [ %value_phi, %L2 ] +; @ REPL[10]:3 within `nextfib' +; ┌ @ int.jl:83 within `<' + %.not = icmp slt i64 %value_phi, %0 +; └ +; @ REPL[10]:4 within `nextfib' +; ┌ @ int.jl:87 within `+' + %1 = add i64 %value_phi1, %value_phi +; └ +; @ REPL[10]:3 within `nextfib' + br i1 %.not, label %L2, label %L8 + +L8: ; preds = %L2 +; @ REPL[10]:6 within `nextfib' + ret i64 %value_phi +} +``` + +LLVM code can be tricky to understand first, but one gets used to it. Notice references to the source code, which help with orientation. From the code above, we may infer + +- code starts by jumping to label L2, from where it reads values of two variables to two "registers" `value_phi` and `value_phi1` (variables in LLVM starts with `%`). +- Both registers are treated as `int64` and initialized by `1`. +- `[ 1, %top ], [ %value_phi, %L2 ]` means that values are initialized as `1` if you come from the label `top` and as value `value_phi` if you come from `%2`. This is the LLVM's selector (phony `φ`). +- `icmp slt i64 %value_phi, %0` compares the variable `%value_phi` to the content of variable `%0`. Notice the anotation that we are comparing `Int64`. +- `%1 = add i64 %value_phi1, %value_phi` adds two variables `%value_phi1` and `%value_phi`. Note again than we are using `Int64` addition. +- `br i1 %.not, label %L2, label %L8` implements a conditional jump depending on the content of `%.not` variable. +- `ret i64 %value_phi` returns the value indicating it to be an `Int64`. + +It is not expected you will be directly operating on the LLVM code, though there are libraries which does that. For example `Enzyme.jl` performs automatic differentiation of LLVM code, which has the benefit of being able to take a gradient through `setindex!`. + +#### Producing the native vode + +**Native code** The last stage is generation of the native code, which Julia executes. The native code depends on the target architecture (e.g. x86, ARM). As in previous cases there is a macro for viewing the compiled code `@code_native` + +```julia +julia> @code_native debuginfo=:source nextfib(3) + .section __TEXT,__text,regular,pure_instructions +; ┌ @ REPL[10]:1 within `nextfib' + movl $1, %ecx + movl $1, %eax + nopw (%rax,%rax) +L16: + movq %rax, %rdx + movq %rcx, %rax +; │ @ REPL[10]:4 within `nextfib' +; │┌ @ int.jl:87 within `+' + addq %rcx, %rdx + movq %rdx, %rcx +; │└ +; │ @ REPL[10]:3 within `nextfib' +; │┌ @ int.jl:83 within `<' + cmpq %rdi, %rax +; │└ + jl L16 +; │ @ REPL[10]:6 within `nextfib' + retq + nopw %cs:(%rax,%rax) +; └ +``` + +and the output is used mainly for debugging / inspection. + +## Looking around the language + +Language introspection is very convenient for investigating, how things are implemented and how they are optimized / compiled to the native code. + +::: tip Reminder `@which` + +Though we have already used it quite a few times, recall the very useful macro `@which`, which identifies the concrete function called in a function call. For example `@which mapreduce(sin, +, [1,2,3,4])`. Note again that the macro here is a convenience macro to obtain types of arguments from the expression. Under the hood, it calls `InteractiveUtils.which(function_name, (Base.typesof)(args...))`. Funny enough, you can call `@which InteractiveUtils.which(+, (Base.typesof)(1,1))` to inspect, where `which` is defined. + +::: + +### Broadcasting + +Broadcasting is not a unique concept in programming languages (Python/Numpy, MATLAB), however its implementation in Julia allows to easily fuse operations. For example + +```julia +x = randn(100) +sin.(x) .+ 2 .* cos.(x) .+ x +``` + +is all computed in a single loop. We can inspect, how this is achieved in the lowered code: + +```julia +julia> Meta.@lower sin.(x) .+ 2 .* cos.(x) .+ x +:($(Expr(:thunk, CodeInfo( + @ none within `top-level scope' +1 ─ %1 = Base.broadcasted(sin, x) +│ %2 = Base.broadcasted(cos, x) +│ %3 = Base.broadcasted(*, 2, %2) +│ %4 = Base.broadcasted(+, %1, %3) +│ %5 = Base.broadcasted(+, %4, x) +│ %6 = Base.materialize(%5) +└── return %6 +)))) +``` + +Notice that we have not used the usual `@code_lowered` macro, because the statement to be lowered is not a function call. In these cases, we have to use `@code_lowered`, which can handle more general program statements. On these cases, we cannot use `@which` either, as that applies to function calls only as well. + +### Generators + +```julia +Meta.@lower [x for x in 1:4] +:($(Expr(:thunk, CodeInfo( + @ none within `top-level scope' +1 ─ %1 = 1:4 +│ %2 = Base.Generator(Base.identity, %1) +│ %3 = Base.collect(%2) +└── return %3 +)))) +``` + +from which we see that the `Generator` is implemented using the combination of a `Base.collect`, which is a function collecting items of a sequence and `Base.Generator(f,x)`, which implements an iterator, which applies function `f` on elements of `x` over which is being iterated. So an almost magical generators have instantly lost their magic. + +### Closures + +```julia +adder(x) = y -> y + x + +julia> @code_lowered adder(5) +CodeInfo( +1 ─ %1 = Main.:(var"#8#9") +│ %2 = Core.typeof(x) +│ %3 = Core.apply_type(%1, %2) +│ #8 = %new(%3, x) +└── return #8 +) +``` + +`Core.apply_type` is a one way of constructing an object briefly mentioned in description of [object allocation.](https://docs.julialang.org/en/v1/devdocs/object/#Object-allocation) + +### The effect of type-instability + +```julia +struct Wolf + name::String + energy::Int +end + +struct Sheep + name::String + energy::Int +end + +sound(wolf::Wolf) = println(wolf.name, " has howled.") +sound(sheep::Sheep) = println(sheep.name, " has baaed.") +stable_pack = (Wolf("1", 1), Wolf("2", 2), Sheep("3", 3)) +unstable_pack = [Wolf("1", 1), Wolf("2", 2), Sheep("3", 3)] +@code_typed map(sound, stable_pack) +@code_typed map(sound, unstable_pack) +``` + +::: tip Cthulhu.jl + +`Cthulhu.jl` is a library (tool) which simplifies the above, where we want to iteratively dive into functions called in some piece of code (typically some function). `Cthulhu` is different from te normal debugger, since the debugger is executing the code, while `Cthulhu` is just lower_typing the code and presenting functions (with type of arguments inferred). + +::: + +```julia +using Cthulhu +@descend map(sound, unstable_pack) +``` + +## General notes on metaprogramming + +According to an excellent [talk](https://www.youtube.com/watch?v=mSgXWpvQEHE) by Steven Johnson, you should use metaprogramming sparingly, because on one hand it's very powerful, but on the other it is generally difficult to read and it can lead to unexpected errors. Julia allows you to interact with the compiler at two different levels. + +1. After the code is parsed to AST, you can modify it directly or through **macros**. +2. When SSA form is being typed, you can create custom functions using the concept of **generated functions** or directly emit intermediate representation. + +More functionalities are coming through the [JuliaCompilerPlugins](https://github.com/JuliaCompilerPlugins) project, but we will not talk about them (yet), as they are not mature yet. + +## What is Quotation? + +When we are doing metaprogramming, we need to somehow tell the compiler that the next block of code is not a normal block of code to be executed, but that it should be interpreted as data and in any sense it should not be evaluated. **Quotation** refers to exactly this syntactic sugar. In Julia, quotation is achieved either through `:(...)` or `quote ... end`. + +Notice the difference between + +```julia +1 + 1 +``` + +and + +```julia +:(1 + 1) +``` + +The type returned by the quotation depends on what is quoted. Observe the returned type of the following quoted code + +```julia +:(1) |> typeof +:(:x) |> typeof +:(1 + x) |> typeof +quote + 1 + x + x + 1 +end |> typeof +``` + +All of these snippets are examples of the quoted code, but only `:(1 + x)` and the quote block produce objects of type `Expr`. An interesting return type is the `QuoteNode`, which allows to insert piece of code which should contain elements that should not be interpolated. Most of the time, quoting returns `Expr`essions. + +## Expressions + +Abstract Syntax Tree, the output of Julia's parser, is expressed using Julia's own datastructures, which means that you can freely manipulate it (and constructed) from the language itself. This property is called **homoiconicity**. Julia's compiler allows you to intercept compilation just after it has parsed the source code. Before we will take advantage of this power, we should get familiar with the strucute of the AST. + +The best way to inspect the AST is through the combination + +- `Meta.parse,` which parses the source code to AST, +- `dump` which print AST to terminal, +- `eval` which evaluates the AST within the current module. + +Let's start by investigating a very simple statement `1 + 1`, whose AST can be constructed either by `Meta.parse("1 + 1")` or `:(1 + 1)` or `quote 1+1 end` (the last one includes also the line information metadata). + +```julia +julia> p = :(1+1) +:(1 + 1) + +julia> typeof(p) +Expr + +julia> dump(p) +Expr + head: Symbol call + args: Array{Any}((3,)) + 1: Symbol + + 2: Int64 1 + 3: Int64 1 +``` + +The parsed code `p` is of type `Expr`, which according to Julia's help[^2] is *a type representing compound expressions in parsed julia code (ASTs). Each expression consists: of a head Symbol identifying which kind of expression it is (e.g. a call, for loop, conditional statement, etc.), and subexpressions (e.g. the arguments of a call). The subexpressions are stored in a Vector{Any} field called args.* If you recall the figure above, where AST was represented as a tree, `head` gives each node the name name `args` are either some parameters of the node, or they point to childs of that node. The interpretation of the node depends on the its type stored in head (note that the word type used here is not in the Julia sense). + +[^2]: Help: [`Core.Expr`](https://docs.julialang.org/en/v1/base/base/#Core.Expr) + +::: tip `Symbol` type + +When manipulations of expressions, we encounter the term `Symbol`. `Symbol` is the smallest atom from which the program (in AST representation) is built. It is used to identify an element in the language, for example variable, keyword or function name. Symbol is not a string, since string represents itself, whereas `Symbol` can represent something else (a variable). An illustrative example[^3] goes as follows. + +```julia +julia> eval(:foo) +ERROR: foo not defined + +julia> foo = "hello" +"hello" + +julia> eval(:foo) +"hello" + +julia> eval("foo") +"foo" +``` + +which shows that what the symbol `:foo` evaluates to depends on what – if anything – the variable `foo` is bound to, whereas "foo" always just evaluates to "foo". + +Symbols can be constructed either by prepending any string with `:` or by calling `Symbol(...)`, which concatenates the arguments and create the symbol out of it. All of the following are symbols + +```julia +julia> :+ +:+ + +julia> :function +:function + +julia> :call +:call + +julia> :x +:x + +julia> Symbol(:Very,"_twisted_",:symbol,"_definition") +:Very_twisted_symbol_definition + +julia> Symbol("Symbol with blanks") +Symbol("Symbol with blanks") +``` + +Symbols therefore allows us to operate with a piece of code without evaluating it. + +In Julia, symbols are "interned strings", which means that compiler attaches each string a unique identifier (integer), such that it can quickly compare them. Compiler uses Symbols exclusively and the important feature is that they can be quickly compared. This is why people like to use them as keys in `Dict`. + +::: + +[^3]: An [example](https://stackoverflow.com/questions/23480722/what-is-a-symbol-in-julia) provided by Stefan Karpinski. + +::: tip `Expr`essions + +From Julia's help[^2]: + +`Expr(head::Symbol, args...)` + +A type representing compound expressions in parsed julia code (ASTs). Each expression consists of a head `Symbol` identifying which kind of expression it is (e.g. a call, for loop, conditional statement, etc.), and subexpressions (e.g. the arguments of a call). +The subexpressions are stored in a `Vector{Any}` field called args. + +The expression is simple yet very flexible. The head `Symbol` tells how the expression should be treated and arguments provide all needed parameters. Notice that the structure is also type-unstable. This is not a big deal, since the expression is used to generate code, hence it is not executed repeatedly. + +::: + +## Construct code from scratch + +Since `Expr` is a Julia structure, we can construct it manually as we can construct any other structure + +```julia +julia> Expr(:call, :+, 1 , 1) |> dump +Expr + head: Symbol call + args: Array{Any}((3,)) + 1: Symbol + + 2: Int64 1 + 3: Int64 1 +``` + +yielding to the same structure as we have created above. +Expressions can be evaluated using `eval`, as has been said. to programmatically evaluate our expression, let's do + +```julia +e = Expr(:call, :+, 1, 1) +eval(e) +``` + +We are free to use variables (identified by symbols) inside the expression + +```julia +e = Expr(:call, :+, :x, 5) +eval(e) +``` + +but unless they are not defined within the scope, the expression cannot produce a meaningful result + +```julia +x = 3 +eval(e) +``` + +```julia +:(1 + sin(x)) == Expr(:call, :+, 1, Expr(:call, :sin, :x)) +``` + +Since the expression is a Julia structure, we are free to manipulate it. Let's for example substitutue `x` in `e = :(x + 5)` with `2x`. + +```julia +e = :(x + 5) +e.args = map(e.args) do a + a == :x ? Expr(:call, :*, 2, :x) : a +end +``` + +or + +```julia +e = :(x + 5) +e.args = map(e.args) do a + a == :x ? :(2*x) : a +end +``` + +and verify that the results are correct. + +```julia +julia> dump(e) +Expr + head: Symbol call + args: Array{Any}((3,)) + 1: Symbol + + 2: Expr + head: Symbol call + args: Array{Any}((3,)) + 1: Symbol * + 2: Int64 2 + 3: Symbol x + 3: Int64 5 + +julia> eval(e) +11 +``` + +As already mentioned, the manipulation of Expression can be arbitrary. In the above example, we have been operating directly on the arguments. But what if `x` would be deeper in the expression, as for example in `2(3 + x) + 2(2 - x) `? We can implement the substitution using multiple dispatch as we would do when implementing any other function in Julia. + +```julia +substitue_x(x::Symbol) = x == :x ? :(2*x) : x +substitue_x(e::Expr) = Expr(e.head, map(substitue_x, e.args)...) +substitue_x(u) = u +``` + +which works as promised. + +```julia +julia> e = :(2(3 + 2x) + 2(2 - x)) +:(2 * (3 + x) + 2 * (2 - x)) +julia> f = substitue_x(e) +:(2 * (3 + 2x) + 2 * (2 - 2x)) +``` + +or we can replace the `sin` function + +```julia +replace_sin(x::Symbol) = x == :sin ? :cos : x +replace_sin(e::Expr) = Expr(e.head, map(replace_sin, e.args)...) +replace_sin(u) = u +``` + +```julia +replace_sin(:(1 + sin(x))) +``` + +Sometimes, we want to operate on a block of code as opposed to single line expressions. Recall that a block of code is defined-quoted with `quote ... end`. Let us see how `replace_x` can handle the following example: + +```julia +e = quote + a = x + 3 + b = 2 - x + 2a + 2b +end +``` + +```julia +julia> replace_x(e) |> Base.remove_linenums! +quote + a = 2x + 3 + b = 2 - 2x + 2a + 2b +end + +julia> replace_x(e) |> eval +10 +``` + +### Brittleness of code manipulation + +When we are manipulating the AST or creating new expressions from scratch, there is no **syntactic** validation performed by the parser. It is therefore very easy to create AST which does not make any sense and cannot be compiled. We have already seen that we can refer to variables that were not defined yet (this makes perfect sense). The same goes with functions (which also makes a lot of sense). + +```julia +e = :(g() + 5) +eval(e) +g() = 5 +eval(e) +``` + +But we can also introduce keywords which the language does not know. For example + +```julia +e = Expr(:my_keyword, 1, 2, 3) +:($(Expr(:my_keyword, 1, 2, 3))) + +julia> e.head +:my_keyword + +julia> e.args +3-element Vector{Any}: + 1 + 2 + 3 + +julia> eval(e) +ERROR: syntax: invalid syntax (my_keyword 1 2 3) +Stacktrace: + [1] top-level scope + @ none:1 + [2] eval + @ ./boot.jl:360 [inlined] + [3] eval(x::Expr) + @ Base.MainInclude ./client.jl:446 + [4] top-level scope + @ REPL[8]:1 +``` + +notice that error is not related to undefined variable / function, but the invalid syntax. This also demonstrates the role of `head` in `Expr`. More on Julia AST can be found in the developer [documentation](https://docs.julialang.org/en/v1/devdocs/ast/#Julia-ASTs). + +### Alternative way to look at code + +```julia +Meta.parse("x[3]") |> dump +``` + +We can see a new Symbol `ref` as a head and the position `3` of variable `x`. + +```julia +Meta.parse("(1,2,3)") |> dump +``` + +```julia +Meta.parse("1/2/3") |> dump +``` + +## Code generation + +### Using metaprogramming in inheritance by encapsulation + +Recall that Julia (at the moment) does not support inheritance, therefore the only way to adopt functionality of some object and extend it is through *encapsulation*. Assuming we have some object `T`, we wrap that object into a new structure. +Let's work out a concrete example, where we define the our own matrix. + +```julia +struct MyMatrix{T} <: AbstractMatrix{T} + x::Matrix{T} +end +``` + +Now, to make it useful, we should define all the usual methods, like `size`, `length`, `getindex`, `setindex!`, etc. We can list methods defined with `Matrix` as an argument `methodswith(Matrix)` (recall this will load methods that are defined with currently loaded libraries). Now, we would like to overload them. To minimize the written code, we can write + +```julia +import Base: setindex!, getindex, size, length +for f in [:setindex!, :getindex, :size, :length] + eval(:($(f)(A::MyMatrix, args...) = $(f)(A.x, args...))) +end +``` + +which we can verify now that it works as expected + +```julia +julia> a = MyMatrix([1 2 ; 3 4]) +2×2 MyMatrix{Int64}: + 1 2 + 3 4 + +julia> a[4] +4 + +julia> a[3] = 0 +0 + +julia> a +2×2 MyMatrix{Int64}: + 1 0 + 3 4 +``` + +In this way, Julia acts as its own preprocessor. +The above look can be equally written as + +```julia +for f in [:setindex!, :getindex, :size, :length] + println("$(f)(A::MyMatrix, args...) = $(f)(A.x, args...)") +end +``` + +```julia +for f in [:setindex!, :getindex, :size, :length] + s = "Base.$(f)(A::MyMatrix, args...) = $(f)(A.x, args...)" + println(s) + eval(Meta.parse(s)) +end + +for f in [:setindex!, :getindex, :size, :length] + @eval $(f)(A::MyMatrix, args...) = $(f)(A.x, args...) +end +``` + +Notice that we have just hand-implemented parts of `@forward` macro from [MacroTools](https://github.com/FluxML/MacroTools.jl/blob/master/src/examples/forward.jl), which does exactly this. + +--- + +# Resources +- [Introduction to Julia](https://www.youtube.com/watch?v=osdeT-tWjzk) by Jeff Bezanson on first JuliaCon +- Julia's manual on [metaprogramming](https://docs.julialang.org/en/v1/manual/metaprogramming/) +- David P. Sanders' [workshop @ JuliaCon 2021](https://www.youtube.com/watch?v=2QLhw6LVaq0) +- Steven Johnson's [keynote talk @ JuliaCon 2019](https://www.youtube.com/watch?v=mSgXWpvQEHE) +- James Nash's [Is Julia Aot or JIT @ JuliaCon 2017](https://www.youtube.com/watch?v=7KGZ_9D_DbI) +- Andy Ferris's [workshop @ JuliaCon 2018](https://www.youtube.com/watch?v=SeqAQHKLNj4) +- [From Macros to DSL](https://github.com/johnmyleswhite/julia_tutorials) by John Myles White +- Notes on [JuliaCompilerPlugin](https://hackmd.io/bVhb97Q4QTWeBQw8Rq4IFw?both#Julia-Compiler-Plugin-Project) diff --git a/docs_vitepress/src/lectures/lecture_06/parsed_fib.svg b/docs_vitepress/src/lectures/lecture_06/parsed_fib.svg new file mode 100644 index 00000000..67455229 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_06/parsed_fib.svg @@ -0,0 +1,376 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/Manifest.toml b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/Manifest.toml new file mode 100644 index 00000000..87e3383a --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/Manifest.toml @@ -0,0 +1,229 @@ +# This file is machine-generated - editing it directly is not advised + +[[ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" + +[[Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "f885e7e7c124f8c92650d61b9477b9ac2ee607dd" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.11.1" + +[[ChangesOfVariables]] +deps = ["LinearAlgebra", "Test"] +git-tree-sha1 = "9a1d594397670492219635b35a3d830b04730d62" +uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" +version = "0.1.1" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "dce3e3fea680869eaa0b774b2e8343e9ff442313" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.40.0" + +[[DataAPI]] +git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.9.0" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.10" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.6" + +[[Downloads]] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" + +[[EcosystemCore]] +deps = ["StatsBase"] +git-tree-sha1 = "234cff8809f0c32fde3d7ed7c20b66af4286db14" +repo-rev = "onlycore" +repo-url = "https://github.com/JuliaTeachingCTU/EcosystemCore.jl.git" +uuid = "3e0d8730-8ea0-4ee2-afe6-c85384c618a2" +version = "0.1.0" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[InverseFunctions]] +deps = ["Test"] +git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.2" + +[[IrrationalConstants]] +git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.1.1" + +[[LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" + +[[LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" + +[[LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[LogExpFunctions]] +deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "be9eef9f9d78cecb6f262f3c10da151a6c5ab827" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.5" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.0.2" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" + +[[NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" + +[[OrderedCollections]] +git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.1" + +[[Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.0.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsAPI]] +git-tree-sha1 = "1958272568dc176a1d881acb797beb909c785510" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.0.0" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "eb35dcc66558b2dda84079b9a1be17557d32091a" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.12" + +[[TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + +[[Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" + +[[Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" + +[[nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" + +[[p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" diff --git a/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/Project.toml b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/Project.toml new file mode 100644 index 00000000..490ae7aa --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/Project.toml @@ -0,0 +1,8 @@ +name = "Ecosystem" +uuid = "e629da61-eb5c-4a46-88f0-e08c691183e3" +authors = ["Jan Francu "] +version = "0.1.0" + +[deps] +EcosystemCore = "3e0d8730-8ea0-4ee2-afe6-c85384c618a2" +StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" diff --git a/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/Ecosystem.jl b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/Ecosystem.jl new file mode 100644 index 00000000..02d9145e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/Ecosystem.jl @@ -0,0 +1,49 @@ +module Ecosystem + +using StatsBase +using EcosystemCore + +include("./ecosystem_macros.jl") +include("./ecosystem_agents.jl") + +export World +export agent_step!, agent_count, world_step!, simulate!, every_nth + +function simulate!(world::World, iters::Int; cb=()->()) + for i in 1:iters + world_step!(world) + cb() + end +end + +agent_count(p::Plant) = size(p)/EcosystemCore.max_size(p) +agent_count(::Animal) = 1 +agent_count(as::Vector{<:Agent}) = sum(agent_count,as) + +function agent_count(w::World) + function op(d::Dict,a::Agent{S}) where S<:Species + n = nameof(S) + if n in keys(d) + d[n] += agent_count(a) + else + d[n] = agent_count(a) + end + return d + end + foldl(op, w.agents |> values |> collect, init=Dict{Symbol,Real}()) +end + +function every_nth(f::Function, n::Int) + i = 1 + function callback(args...) + # display(i) # comment this out to see out the counter increases + if i == n + f(args...) + i = 1 + else + i += 1 + end + end +end + +end diff --git a/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/ecosystem_agents.jl b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/ecosystem_agents.jl new file mode 100644 index 00000000..2be3a789 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/ecosystem_agents.jl @@ -0,0 +1,57 @@ +### old definition + +#= +# animals +abstract type Sheep <: AnimalSpecies end +abstract type Wolf <: AnimalSpecies end + +# plants +abstract type Mushroom <: PlantSpecies end +abstract type Grass <: PlantSpecies end + +export Grass, Sheep, Wolf, Mushroom + +Base.show(io::IO, ::Type{Sheep}) = print(io,"🐑") +Base.show(io::IO, ::Type{Wolf}) = print(io,"🐺") + +Base.show(io::IO,::Type{Mushroom}) = print(io,"🍄") +Base.show(io::IO, ::Type{Grass}) = print(io,"🌿") + + +function EcosystemCore.eat!(s::Animal{Sheep}, m::Plant{Mushroom}, w::World) + if size(p)>0 + incr_energy!(s, -size(m)*Δenergy(s)) + m.size = 0 + end +end + +function EcosystemCore.eat!(a::Animal{Wolf}, b::Animal{Sheep}, w::World) + incr_energy!(a, energy(b)*Δenergy(a)) + kill_agent!(b,w) +end + +function EcosystemCore.eat!(a::Animal{Sheep}, b::Plant{Grass}, w::World) + incr_energy!(a, size(b)*Δenergy(a)) + b.size = 0 +end + +EcosystemCore.eats(::Animal{Sheep}, ::Plant{Mushroom}) = true +EcosystemCore.eats(::Animal{Sheep}, ::Plant{Grass}) = true +EcosystemCore.eats(::Animal{Wolf},::Animal{Sheep}) = true + +EcosystemCore.mates(::Animal{S,Female}, ::Animal{S,Male}) where S<:Species = true +EcosystemCore.mates(::Animal{S,Male}, ::Animal{S,Female}) where S<:Species = true +EcosystemCore.mates(a::Agent, b::Agent) = false +=# + +### new definition using macros from `ecosystem_macros.jl` +@plant Grass 🌿 +@plant Broccoli 🥦 +@plant Mushroom 🍄 +@animal Sheep 🐑 +@animal Wolf 🐺 +@animal Rabbit 🐇 + +@eats Rabbit [Grass => 0.5, Broccoli => 1.0] +@eats Sheep [Grass => 0.5, Broccoli => 1.0, Mushroom => -1.0] +@eats Wolf [Sheep => 0.9] \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/ecosystem_macros.jl b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/ecosystem_macros.jl new file mode 100644 index 00000000..63c1cfc4 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/Ecosystem.jl/src/ecosystem_macros.jl @@ -0,0 +1,59 @@ +### species definition +macro species(typ, name, icon) + esc(_species(typ, name, icon)) +end + +function _species(typ, name, icon) + quote + abstract type $name <: $(typ == :Animal ? AnimalSpecies : PlantSpecies) end + Base.show(io::IO, ::Type{$name}) = print(io, $(QuoteNode(icon))) + export $name + end +end + +macro plant(name, icon) + return :(@species Plant $name $icon) +end + +macro animal(name, icon) + return :(@species Animal $name $icon) +end + +### eating behavior +macro eats(species::Symbol, foodlist::Expr) + return esc(_eats(species, foodlist)) +end + + +function _generate_eat(eater::Type{<:AnimalSpecies}, food::Type{<:PlantSpecies}, multiplier) + quote + EcosystemCore.eats(::Animal{$(eater)}, ::Plant{$(food)}) = true + function EcosystemCore.eat!(a::Animal{$(eater)}, p::Plant{$(food)}, w::World) + if size(p)>0 + incr_energy!(a, $(multiplier)*size(p)*Δenergy(a)) + p.size = 0 + end + end + end +end + +function _generate_eat(eater::Type{<:AnimalSpecies}, food::Type{<:AnimalSpecies}, multiplier) + quote + EcosystemCore.eats(::Animal{$(eater)}, ::Animal{$(food)}) = true + function EcosystemCore.eat!(ae::Animal{$(eater)}, af::Animal{$(food)}, w::World) + incr_energy!(ae, $(multiplier)*energy(af)*Δenergy(ae)) + kill_agent!(af, w) + end + end +end + +_parse_eats(ex) = Dict(arg.args[2] => arg.args[3] for arg in ex.args if arg.head == :call && arg.args[1] == :(=>)) + +function _eats(species, foodlist) + cfg = _parse_eats(foodlist) + code = Expr(:block) + for (k,v) in cfg + push!(code.args, _generate_eat(eval(species), eval(k), v)) + end + code +end \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_07/hw.md b/docs_vitepress/src/lectures/lecture_07/hw.md new file mode 100644 index 00000000..94c1af55 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/hw.md @@ -0,0 +1,50 @@ +# [Homework 7: Creating world in 3 days/steps](@id hw07) + +## How to submit +Put all the code of inside `hw.jl`. Zip only this file (not its parent folder) and upload it to BRUTE. You should assume that only + +```julia +using Ecosystem +``` + +will be put before your file is executed, but do not include them in your solution. The version of `Ecosystem` pkg should be the same as in [HW4](@ref hw4). + +::: danger Homework (2 points) + +Create a macro `@ecosystem` that should be able to define a world given a list of statements `@add # $species ${optional:sex}` +```julia +world = @ecosystem begin + @add 10 Sheep female # adds 10 female sheep + @add 2 Sheep male # adds 2 male sheep + @add 100 Grass # adds 100 pieces of grass + @add 3 Wolf # adds 5 wolf with random sex +end +``` +`@add` should not be treated as a macro, but rather just as a syntax, that can be easily matched. + +As this is not a small task let's break it into 3 steps. (These intemediate steps will also be checked in BRUTE.) + +1. Define method `default_config(::Type{T})` for each `T` in `Grass, Wolf,...`, which returns a named tuple of default parameters for that particular agent (you can choose the default values however you like). +2. Define method `_add_agents(max_id, count::Int, species::Type{<:Species})` and `_add_agents(max_id, count::Int, species::Type{<:AnimalSpecies}, sex::Sex)` that return an array of `count` agents of species `species` with `id` going from `max_id+1` to `max_id+count`. Default parameters should be constructed with `default_config`. Make sure you can handle even animals with random sex (`@add 3 Wolf`). +3. Define the underlying function `_ecosystem(ex)`, which parses the block expression and creates a piece of code that constructs the world. + +You can test the macro (more precisely the `_ecosystem` function) with the following expression + +```julia +ex = :(begin + @add 10 Sheep female + @add 2 Sheep male + @add 100 Grass + @add 3 Wolf +end) +genex = _ecosystem(ex) +world = eval(genex) +``` + +::: + +::: details Shwo solution + +Nothing to see here + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_07/lab.md b/docs_vitepress/src/lectures/lecture_07/lab.md new file mode 100644 index 00000000..0eb4a919 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/lab.md @@ -0,0 +1,607 @@ +# [Lab 07: Macros](@id macro_lab) + +A little reminder from the [lecture](@ref macro_lecture), a macro in its essence is a function, which + +1. takes as an input an expression (parsed input) +2. modifies the expressions in arguments +3. inserts the modified expression at the same place as the one that is parsed. + +In this lab we are going to use what we have learned about manipulation of expressions and explore avenues of where macros can be useful + +- convenience (`@repeat`, `@show`) +- performance critical code generation (`@poly`) +- alleviate tedious code generation (`@species`, `@eats`) +- just as a syntactic sugar (`@ecosystem`) + +## Show macro + +Let's start with dissecting "simple" `@show` macro, which allows us to demonstrate advanced concepts of macros and expression manipulation. + +```@repl lab07_show +x = 1 +@show x + 1 +let y = x + 1 # creates a temporary local variable + println("x + 1 = ", y) + y # show macro also returns the result +end + +# assignments should create the variable +@show x = 3 +let y = x = 2 + println("x = 2 = ", y) + y +end +x # should be equal to 2 +``` + +The original Julia's [implementation](https://github.com/JuliaLang/julia/blob/ae8452a9e0b973991c30f27beb2201db1b0ea0d3/base/show.jl#L946-L959) is not dissimilar to the following macro definition: + +```@example lab07_show +macro myshow(ex) + quote + println($(QuoteNode(ex)), " = ", repr(begin local value = $(esc(ex)) end)) + value + end +end +``` + +Testing it gives us the expected behavior + +```@repl lab07_show +@myshow xx = 1 + 1 +xx # should be defined +``` + +In this "simple" example, we had to use the following concepts mentioned already in the [lecture](@ref macro_lecture): + +- `QuoteNode(ex)` is used to wrap the expression inside another layer of quoting, such that when it is interpolated into `:()` it stays being a piece of code instead of the value it represents - [**TRUE QUOTING**](@ref lec7_quotation) +- `esc(ex)` is used in case that the expression contains an assignment, that has to be evaluated in the top level module `Main` (we are `esc`aping the local context) - [**ESCAPING**](@ref lec7_hygiene) +- `$(QuoteNode(ex))` and `$(esc(ex))` is used to evaluate an expression into another expression. [**INTERPOLATION**](@ref lec7_quotation) +- `local value = ` is used in order to return back the result after evaluation + +Lastly, let's mention that we can use `@macroexpand` to see how the code is manipulated in the `@myshow` macro + +```@repl lab07_show +@macroexpand @show x + 1 +``` + +## Repeat macro + +In the profiling/performance [labs](@ref perf_lab) we have sometimes needed to run some code multiple times in order to gather some samples and we have tediously written out simple for loops inside functions such as this + +```julia +function run_polynomial(n, a, x) + for _ in 1:n + polynomial(a, x) + end +end +``` + +We can remove this boilerplate code by creating a very simple macro that does this for us. + +::: warning Exercise + +Define macro `@repeat` that takes two arguments, first one being the number of times a code is to be run and the other being the actual code. + +```julia +julia> @repeat 3 println("Hello!") +Hello! +Hello! +Hello! +``` + +Before defining the macro, it is recommended to write the code manipulation functionality into a helper function `_repeat`, which helps in organization and debugging of macros. + +```julia +_repeat(3, :(println("Hello!"))) # testing "macro" without defining it +``` + +**HINTS**: +- use `$` interpolation into a for loop expression; for example given `ex = :(1+x)` we can interpolate it into another expression `:($ex + y)` -> `:(1 + x + y)` +- if unsure what gets interpolated use round brackets `:($(ex) + y)` +- macro is a function that *creates* code that does what we want + +**BONUS**: +What happens if we call `@repeat 3 x = 2`? Is `x` defined? + +::: + +::: details Show Solution + + +```@repl lab07_repeat +macro repeat(n::Int, ex) + return _repeat(n, ex) +end + +function _repeat(n::Int, ex) + :(for _ in 1:$n + $ex + end) +end + +_repeat(3, :(println("Hello!"))) +@repeat 3 println("Hello!") +``` + +Even if we had used escaping the expression `x = 2` won't get evaluated properly due to the induced scope of the for loop. In order to resolve this we would have to specially match that kind of expression and generate a proper syntax withing the for loop `global $ex`. However we may just warn the user in the docstring that the usage is disallowed. + +::: + +Note that this kind of repeat macro is also defined in the [`Flux.jl`](https://fluxml.ai/) machine learning framework, wherein it's called `@epochs` and is used for creating training [loop](https://fluxml.ai/Flux.jl/stable/training/training/#Datasets). + +## [Polynomial macro](@id lab07_polymacro) + +This is probably the last time we are rewriting the `polynomial` function, though not quite in the same way. We have seen in the last [lab](@ref introspection_lab), that some optimizations occur automatically, when the compiler can infer the length of the coefficient array, however with macros we can *generate* optimized code directly (not on the same level - we are essentially preparing already unrolled/inlined code). + +Ideally we would like to write some macro `@poly` that takes a polynomial in a mathematical notation and spits out an anonymous function for its evaluation, where the loop is unrolled. + +*Example usage*: + +```julia +p = @poly x 3x^2+2x^1+10x^0 # the first argument being the independent variable to match +p(2) # return the value +``` + +However in order to make this happen, let's first consider much simpler case of creating the same but without the need for parsing the polynomial as a whole and employ the fact that macro can have multiple arguments separated by spaces. + +```julia +p = @poly 3 2 10 +p(2) +``` + +::: warning Exercise + +Create macro `@poly` that takes multiple arguments and creates an anonymous function that constructs the unrolled code. Instead of directly defining the macro inside the macro body, create helper function `_poly` with the same signature that can be reused outside of it. + +Recall Horner's method polynomial evaluation from previous [labs](@ref horner): + +```julia +function polynomial(a, x) + accumulator = a[end] * one(x) + for i in length(a)-1:-1:1 + accumulator = accumulator * x + a[i] + #= accumulator = muladd(x, accumulator, a[i]) =# # equivalent + end + accumulator +end +``` + +**HINTS**: + +- you can use `muladd` function as replacement for `ac * x + a[i]` +- think of the `accumulator` variable as the mathematical expression that is incrementally built (try to write out the Horner's method[^1] to see it) +- you can nest expression arbitrarily +- the order of coefficients has different order than in previous labs (going from high powers of `x` last to them being first) +- use `evalpoly` to check the correctness + +```julia +using Test +p = @poly 3 2 10 +@test p(2) == evalpoly(2, [10,2,3]) # reversed coefficients +``` + +[^1]: Explanation of the Horner schema can be found on [https://en.wikipedia.org/wiki/Horner%27s\_method](https://en.wikipedia.org/wiki/Horner%27s_method). + +::: + +::: details Show Solution + +```@repl lab07_poly +using InteractiveUtils #hide +macro poly(a...) + return _poly(a...) +end + +function _poly(a...) + N = length(a) + ex = :($(a[1])) + for i in 2:N + ex = :(muladd(x, $ex, $(a[i]))) # equivalent of :(x * $ex + $(a[i])) + end + :(x -> $ex) +end + +p = @poly 3 2 10 +p(2) == evalpoly(2, [10,2,3]) +@code_lowered p(2) # can show the generated code +``` + +::: + +Moving on to the first/harder case, where we need to parse the mathematical expression. + +::: warning Exercise + +Create macro `@poly` that takes two arguments first one being the independent variable and second one being the polynomial written in mathematical notation. As in the previous case this macro should define an anonymous function that constructs the unrolled code. + +```julia +julia> p = @poly x 3x^2+2x^1+10x^0 # the first argument being the independent variable to match +``` + +**HINTS**: + +- though in general we should be prepared for some edge cases, assume that we are really strict with the syntax allowed (e.g. we really require spelling out x^0, even though it is mathematically equivalent to `1`) +- reuse the `_poly` function from the previous exercise +- use the `MacroTools.jl` to match/capture `a_*$v^(n_)`, where `v` is the symbol of independent variable, this is going to be useful in the following steps + 1. get maximal rank of the polynomial + 2. get coefficient for each power + +::: + +::: tip `MacroTools.jl` + +Though not the most intuitive, [`MacroTools.jl`](https://fluxml.ai/MacroTools.jl/stable/) pkg help us with writing custom macros. We will use two utilities + +#### `@capture` + +This macro is used to match a pattern in a *single* expression and return values of particular spots. For example + +```julia +julia> using MacroTools +julia> @capture(:[1, 2, 3, 4, 5, 6, 7], [1, a_, 3, b__, c_]) +true + +julia> a, b, c +(2,[4,5,6],7) +``` + +#### `postwalk`/`prewalk` + +In order to extend `@capture` to more complicated expression trees, we can used either `postwalk` or `prewalk` to walk the AST and match expression along the way. For example + +```julia +julia> using MacroTools: prewalk, postwalk +julia> ex = quote + x = f(y, g(z)) + return h(x) + end + +julia> postwalk(ex) do x + @capture(x, fun_(arg_)) && println("Function: ", fun, " with argument: ", arg) + x + end; +Function: g with argument: z +Function: h with argument: x +``` + +Note that the `x` or the iteration is required, because by default postwalk/prewalk replaces currently read expression with the output of the body of `do` block. + +::: + +::: details Show Solution + + +```@example lab07_poly +using MacroTools +using MacroTools: postwalk, prewalk + +macro poly(v::Symbol, p::Expr) + a = Tuple(reverse(_get_coeffs(v, p))) + return _poly(a...) +end + +function _max_rank(v, p) + mr = 0 + postwalk(p) do x + if @capture(x, a_*$v^(n_)) + mr = max(mr, n) + end + x + end + mr +end + +function _get_coeffs(v, p) + N = _max_rank(v, p) + 1 + coefficients = zeros(N) + postwalk(p) do x + if @capture(x, a_*$v^(n_)) + coefficients[n+1] = a + end + x + end + coefficients +end +``` + +Let's test it. + +```@repl lab07_poly +p = @poly x 3x^2+2x^1+10x^0 +p(2) == evalpoly(2, [10,2,3]) +@code_lowered p(2) # can show the generated code +``` + +::: + +## Ecosystem macros + +There are at least two ways how we can make our life simpler when using our `Ecosystem` and `EcosystemCore` pkgs. Firstly, recall that in order to test our simulation we always had to write something like this: + +```julia +function create_world() + n_grass = 500 + regrowth_time = 17.0 + + n_sheep = 100 + Δenergy_sheep = 5.0 + sheep_reproduce = 0.5 + sheep_foodprob = 0.4 + + n_wolves = 8 + Δenergy_wolf = 17.0 + wolf_reproduce = 0.03 + wolf_foodprob = 0.02 + + gs = [Grass(id, regrowth_time) for id in 1:n_grass]; + ss = [Sheep(id, 2*Δenergy_sheep, Δenergy_sheep, sheep_reproduce, sheep_foodprob) for id in n_grass+1:n_grass+n_sheep]; + ws = [Wolf(id, 2*Δenergy_wolf, Δenergy_wolf, wolf_reproduce, wolf_foodprob) for id in n_grass+n_sheep+1:n_grass+n_sheep+n_wolves]; + World(vcat(gs, ss, ws)) +end +world = create_world(); +``` + +which includes the tedious process of defining the agent counts, their parameters and last but not least the unique id manipulation. As part of the [HW](@ref hw07) for this lecture you will be tasked to define a simple DSL, which can be used to define a world in a few lines. + +Secondly, the definition of a new `Animal` or `Plant`, that did not have any special behavior currently requires quite a bit of repetitive code. For example defining a new plant type `Broccoli` goes as follows + +```julia +abstract type Broccoli <: PlantSpecies end +Base.show(io::IO,::Type{Broccoli}) = print(io,"🥦") + +EcosystemCore.eats(::Animal{Sheep},::Plant{Broccoli}) = true +``` + +and definition of a new animal like a `Rabbit` looks very similar + +```julia +abstract type Rabbit <: AnimalSpecies end +Base.show(io::IO,::Type{Rabbit}) = print(io,"🐇") + +EcosystemCore.eats(::Animal{Rabbit},p::Plant{Grass}) = size(p) > 0 +EcosystemCore.eats(::Animal{Rabbit},p::Plant{Broccoli}) = size(p) > 0 +``` + +In order to make this code "clearer" (depends on your preference) we will create two macros, which can be called at one place to construct all the relations. + +### New Animal/Plant definition + +Our goal is to be able to define new plants and animal species, while having a clear idea about their relations. For this we have proposed the following macros/syntax: + +```julia +@species Plant Broccoli 🥦 +@species Animal Rabbit 🐇 +@eats Rabbit [Grass => 0.5, Broccoli => 1.0, Mushroom => -1.0] +``` + +Unfortunately the current version of `Ecosystem` and `EcosystemCore`, already contains some definitions of species such as `Sheep`, `Wolf` and `Mushroom`, which may collide with definitions during prototyping, therefore we have created a modified version of those pkgs, which will be provided in the lab. + +::: tip Testing relations + +We can test the current definition with the following code that constructs "eating matrix" +```julia +using Ecosystem +using Ecosystem.EcosystemCore + +function eating_matrix() + _init(ps::Type{<:PlantSpecies}) = ps(1, 10.0) + _init(as::Type{<:AnimalSpecies}) = as(1, 10.0, 1.0, 0.8, 0.7) + function _check(s1, s2) + try + if s1 !== s2 + EcosystemCore.eats(_init(s1), _init(s2)) ? "✅" : "❌" + else + return "❌" + end + catch e + if e isa MethodError + return "❔" + else + throw(e) + end + end + end + + animal_species = subtypes(AnimalSpecies) + plant_species = subtypes(PlantSpecies) + species = vcat(animal_species, plant_species) + em = [_check(s, ss) for (s,ss) in Iterators.product(animal_species, species)] + string.(hcat(["🌍", animal_species...], vcat(permutedims(species), em))) +end +eating_matrix() + 🌍 🐑 🐺 🌿 🍄 + 🐑 ❌ ❌ ✅ ✅ + 🐺 ✅ ❌ ❌ ❌ +``` + +::: + +::: warning Exercise + +Based on the following example syntax, + +```julia +@species Plant Broccoli 🥦 +@species Animal Rabbit 🐇 +``` + +write macro `@species` inside `Ecosystem` pkg, which defines the abstract type, its show function and exports the type. For example `@species Plant Broccoli 🥦` should generate code: + +```julia +abstract type Broccoli <: PlantSpecies end +Base.show(io::IO,::Type{Broccoli}) = print(io,"🥦") +export Broccoli +``` + +Define first helper function `_species` to inspect the macro's output. This is indispensable, as we are defining new types/constants and thus we may otherwise encounter errors during repeated evaluation (though only if the type signature changed). + +```julia +_species(:Plant, :Broccoli, :🥦) +_species(:Animal, :Rabbit, :🐇) +``` + +**HINTS**: + +- use `QuoteNode` in the show function just like in the `@myshow` example +- escaping `esc` is needed for the returned in order to evaluate in the top most module (`Ecosystem`/`Main`) +- ideally these changes should be made inside the modified `Ecosystem` pkg provided in the lab (though not everything can be refreshed with `Revise`) - there is a file `ecosystem_macros.jl` just for this purpose +- multiple function definitions can be included into a `quote end` block +- interpolation works with any expression, e.g. `$(typ == :Animal ? AnimalSpecies : PlantSpecies)` + +**BONUS**: + +Based on `@species` define also macros `@animal` and `@plant` with two arguments instead of three, where the species type is implicitly carried in the macro's name. + +::: + +::: details Show Solution + +Macro `@species` + +```julia +macro species(typ, name, icon) + esc(_species(typ, name, icon)) +end + +function _species(typ, name, icon) + quote + abstract type $name <: $(typ == :Animal ? AnimalSpecies : PlantSpecies) end + Base.show(io::IO, ::Type{$name}) = print(io, $(QuoteNode(icon))) + export $name + end +end + +_species(:Plant, :Broccoli, :🥦) +_species(:Animal, :Rabbit, :🐇) +``` + +And the bonus macros `@plant` and `@animal` + +```julia +macro plant(name, icon) + return :(@species Plant $name $icon) +end + +macro animal(name, icon) + return :(@species Animal $name $icon) +end +``` + +::: + +The next exercise applies macros to the agents eating behavior. + +::: warning Exercise + +Define macro `@eats` inside `Ecosystem` pkg that assigns particular species their eating habits via `eat!` and `eats` functions. The macro should process the following example syntax + +```julia +@eats Rabbit [Grass => 0.5, Broccoli => 1.0], +``` + +where `Grass => 0.5` defines the behavior of the `eat!` function. The coefficient is used here as a multiplier for the energy balance, in other words the `Rabbit` should get only `0.5` of energy for a piece of `Grass`. + +**HINTS**: + +- ideally these changes should be made inside the modified `Ecosystem` pkg provided in the lab (though not everything can be refreshed with `Revise`) - there is a file `ecosystem_macros.jl` just for this purpose +- escaping `esc` is needed for the returned in order to evaluate in the top most module (`Ecosystem`/`Main`) +- you can create an empty `quote end` block with `code = Expr(:block)` and push new expressions into its `args` incrementally +- use dispatch to create specific code for the different combinations of agents eating other agents (there may be catch in that we have to first `eval` the symbols before calling in order to know if they are animals or plants) + +**BONUS**: + +You can try running the simulation with the newly added agents. + +::: + +::: tip Reminder of `EcosystemCore` `eat!` and `eats` functionality + +In order to define that an `Wolf` eats `Sheep`, we have to define two methods + +```julia +EcosystemCore.eats(::Animal{Wolf}, ::Animal{Sheep}) = true + +function EcosystemCore.eat!(ae::Animal{Wolf}, af::Animal{Sheep}, w::World) + incr_energy!(ae, $(multiplier)*energy(af)*Δenergy(ae)) + kill_agent!(af, w) +end +``` + +In order to define that an `Sheep` eats `Grass`, we have to define two methods + +```julia +EcosystemCore.eats(::Animal{Sheep}, p::Plant{Grass}) = size(p)>0 + +function EcosystemCore.eat!(a::Animal{Sheep}, p::Plant{Grass}, w::World) + incr_energy!(a, $(multiplier)*size(p)*Δenergy(a)) + p.size = 0 +end +``` + +::: + +::: details Show Solution + +```julia +macro eats(species::Symbol, foodlist::Expr) + return esc(_eats(species, foodlist)) +end + + +function _generate_eat(eater::Type{<:AnimalSpecies}, food::Type{<:PlantSpecies}, multiplier) + quote + EcosystemCore.eats(::Animal{$(eater)}, p::Plant{$(food)}) = size(p)>0 + function EcosystemCore.eat!(a::Animal{$(eater)}, p::Plant{$(food)}, w::World) + incr_energy!(a, $(multiplier)*size(p)*Δenergy(a)) + p.size = 0 + end + end +end + +function _generate_eat(eater::Type{<:AnimalSpecies}, food::Type{<:AnimalSpecies}, multiplier) + quote + EcosystemCore.eats(::Animal{$(eater)}, ::Animal{$(food)}) = true + function EcosystemCore.eat!(ae::Animal{$(eater)}, af::Animal{$(food)}, w::World) + incr_energy!(ae, $(multiplier)*energy(af)*Δenergy(ae)) + kill_agent!(af, w) + end + end +end + +_parse_eats(ex) = Dict(arg.args[2] => arg.args[3] for arg in ex.args if arg.head == :call && arg.args[1] == :(=>)) + +function _eats(species, foodlist) + cfg = _parse_eats(foodlist) + code = Expr(:block) + for (k,v) in cfg + push!(code.args, _generate_eat(eval(species), eval(k), v)) + end + code +end + +species = :Rabbit +foodlist = :([Grass => 0.5, Broccoli => 1.0]) +_eats(species, foodlist) +``` + +::: + +--- +## Resources + +- macros in Julia [documentation](https://docs.julialang.org/en/v1/manual/metaprogramming/#man-macros) + +### `Type{T}` type selectors + +We have used `::Type{T}` signature[^2] at few places in the `Ecosystem` family of packages (and it will be helpful in the HW as well), such as in the `show` methods + +```julia +Base.show(io::IO,::Type{World}) = print(io,"🌍") +``` + +This particular example defines a method where the second argument is the `World` type itself and not an instance of a `World` type. As a result we are able to dispatch on specific types as values. + +Furthermore we can use subtyping operator to match all types in a hierarchy, e.g. `::Type{<:AnimalSpecies}` matches all animal species + +[^2]: [https://docs.julialang.org/en/v1/manual/types/#man-typet-type](https://docs.julialang.org/en/v1/manual/types/#man-typet-type) diff --git a/docs_vitepress/src/lectures/lecture_07/lecture.md b/docs_vitepress/src/lectures/lecture_07/lecture.md new file mode 100644 index 00000000..509f44e3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/lecture.md @@ -0,0 +1,862 @@ +# [Macros](@id macro_lecture) + +What is macro? +In its essence, macro is a function, which + +1. takes as an input an expression (parsed input) +2. modify the expressions in argument +3. insert the modified expression at the same place as the one that is parsed. + +Macros are necessary because they execute after the code is parsed (2nd step in conversion of source code to binary as described in last lect, after `Meta.parse`) therefore, macros allow the programmer to generate and include fragments of customized code before the full program is compiled run. **Since they are executed during parsing, they do not have access to the values of their arguments, but only to their syntax**. + +To illustrate the difference, consider the following example: + +A very convenient and highly recommended ways to write macros is to write functions modifying the `Expr`ession and then call that function in the macro. Let's demonstrate on an example, where every occurrence of `sin` is replaced by `cos`. +We defined the function recursively traversing the AST and performing the substitution + +```julia +replace_sin(x::Symbol) = x == :sin ? :cos : x +replace_sin(e::Expr) = Expr(e.head, map(replace_sin, e.args)...) +replace_sin(u) = u +``` + +and then we define the macro + +```julia +macro replace_sin(ex) + replace_sin(esc(ex)) +end + +@replace_sin(cosp1(x) = 1 + sin(x)) +cosp1(1) == 1 + cos(1) +``` + +notice the following + +- the definition of the macro is similar to the definition of the function with the exception that instead of the keyword `function` we use keyword `macro` +- when calling the macro, we signal to the compiler our intention by prepending the name of the macro with `@`. +- the macro receives the expression(s) as the argument instead of the evaluated argument and also returns an expression that is placed on the position where the macro has been called +- when you are using macro, you should be as a user aware that the code you are entering can be arbitrarily modified and you can receive something completely different. This meanst that `@` should also serve as a warning that you are leaving Julia's syntax. In practice, it make sense to make things akin to how they are done in Julia or to write Domain Specific Language with syntax familiar in that domain. +Inspecting the lowered code + +```julia +Meta.@lower @replace_sin( 1 + sin(x)) +``` + +We observe that there is no trace of macro in lowered code (compare to `Meta.@lower 1 + cos(x)`, which demonstrates that the macro has been expanded after the code has been parsed but before it has been lowered. In this sense macros are indispensible, as you cannot replace them simply by the combination of `Meta.parse` end `eval`. You might object that in the above example it is possible, which is true, but only because the effect of the macro is in the global scope. + +```julia +ex = Meta.parse("cosp1(x) = 1 + sin(x)") +ex = replace_sin(ex) +eval(ex) +``` + +The following example cannot be achieved by the same trick, as the output of the macro modifies just the body of the function + +```julia +function cosp2(x) + @replace_sin 2 + sin(x) +end +cosp2(1) ≈ (2 + cos(1)) +``` + +This is not possible + +```julia +function parse_eval_cosp2(x) + ex = Meta.parse("2 + sin(x)") + ex = replace_sin(ex) + eval(ex) +end +``` + +as can be seen from + +```julia +julia> @code_lowered cosp2(1) +CodeInfo( +1 ─ %1 = Main.cos(x) +│ %2 = 2 + %1 +└── return %2 +) + +julia> @code_lowered parse_eval_cosp2(1) +CodeInfo( +1 ─ %1 = Base.getproperty(Main.Meta, :parse) +│ ex = (%1)("2 + sin(x)") +│ ex = Main.replace_sin(ex) +│ %4 = Main.eval(ex) +└── return %4 +) +``` + +::: tip Scope of eval + +`eval` function is always evaluated in the global scope of the `Module` in which the macro is called (note that there is that by default you operate in the `Main` module). Moreover, `eval` takes effect **after** the function has been has been executed. This can be demonstrated as + +```julia +add1(x) = x + 1 +function redefine_add(x) + eval(:(add1(x) = x - 1)) + add1(x) +end +julia> redefine_add(1) +2 + +julia> redefine_add(1) +0 +``` + +::: + +Macros are quite tricky to debug. Macro `@macroexpand` allows to observe the expansion of macros. Observe the effect as + +```julia +@macroexpand @replace_sin(cosp1(x) = 1 + sin(x)) +``` + +## What goes under the hood of macro expansion? + +Let's consider that the compiler is compiling + +```julia +function cosp2(x) + @replace_sin 2 + sin(x) +end +``` + +First, Julia parses the code into the AST as + +```julia +ex = Meta.parse(""" + function cosp2(x) + @replace_sin 2 + sin(x) +end +""") |> Base.remove_linenums! +dump(ex) +``` + +We observe that there is a macrocall in the AST, which means that Julia will expand the macro and put it in place + +```julia +ex.args[2].args[1].head # the location of the macrocall +ex.args[2].args[1].args[1] # which macro to call +ex.args[2].args[1].args[2] # line number +ex.args[2].args[1].args[3] # on which expression +``` + +We can manullay run `replace_sin` and insert it back on the relevant sub-part of the sub-tree + +```julia +ex.args[2].args[1] = replace_sin(ex.args[2].args[1].args[3]) +ex |> dump +``` + +now, `ex` contains the expanded macro and we can see that it correctly defines the function + +```julia +eval(ex) +``` + +## Calling macros + +Macros can be called without parentheses + +```julia +macro showarg(ex) + println("single argument version") + @show ex + ex +end +@showarg(1 + 1) +@showarg 1 + 1 +``` + +Macros use the very same multiple dispatch as functions, which allows to specialize macro calls + +```julia +macro showarg(x1, x2::Symbol) + println("two argument version, second is Symbol") + @show x1 + @show x2 + x1 +end + +macro showarg(x1, x2::Expr) + println("two argument version, second is Expr") + @show x1 + @show x2 + x1 +end + +@showarg(1 + 1, x) +@showarg(1 + 1, 1 + 3) +@showarg 1 + 1, 1 + 3 +@showarg 1 + 1 1 + 3 +``` + +(the `@showarg(1 + 1, :x)` raises an error, since `:(:x)` is of Type `QuoteNode`). + + +Observe that macro dispatch is based on the types of AST that are handed to the macro, not the types that the AST evaluates to at runtime. + +List of all defined versions of macro + +```julia +methods(var"@showarg") +``` + +## [Notes on quotation](@id lec7_quotation) + +In the previous lecture we have seen that we can *quote a block of code*, which tells the compiler to treat the input as a data and parse it. We have talked about three ways of quoting code. + +1. `:(quoted code)` +2. `Meta.parse(input_string)` +3. `quote ... end` + +The truth is that Julia does not do full quotation, but a *quasiquotation* as it allows you to **interpolate** expressions inside the quoted code using `$` symbol similar to the string. This is handy, as sometimes, when we want to insert into the quoted code an result of some computation / preprocessing. +Observe the following difference in returned code + +```julia +a = 5 +:(x = a) +:(x = $(a)) +let y = :x + :(1 + y), :(1 + $y) +end +``` + +In contrast to the behavior of `:()` (or `quote ... end`, true quotation would not perform interpolation where unary `$` occurs. Instead, we would capture the syntax that describes interpolation and produce something like the following: + +```julia +( + :(1 + x), # Quasiquotation + Expr(:call, :+, 1, Expr(:$, :x)), # True quotation +) +``` + +```julia +for (v, f) in [(:sin, :foo_sin)] + quote + $(f)(x) = $(v)(x) + end |> Base.remove_linenums! |> dump +end +``` + +When we need true quoting, i.e. we need something to stay quoted, we can use `QuoteNode` as + +```julia +macro true_quote(e) + QuoteNode(e) +end + +let y = :x + ( + @true_quote(1 + $y), + :(1 + $y), + ) +end +``` + +At first glance, `QuoteNode` wrapper seems to be useless. But `QuoteNode` has clear value when it's used inside a macro to indicate that something should stay quoted even after the macro finishes its work. Also notice that the expression received by macro are quoted, not quasiquoted, since in the latter case `$y` would be replaced. + +We can demonstrate it by defining a new macro `no_quote` which will just return the expression as is + +```julia +macro no_quote(ex) + ex +end + +let y = :x + @no_quote(1 + $y) +end +``` + +The error code snippet errors telling us that the expression `"$"` is outside of a quote block. This is because the macro `@no_quote` has returned a block with `$` occuring outside of `quote` or string definition. + +::: tip + +Some macros like `@eval` (recall last example) + +```julia +for f in [:setindex!, :getindex, :size, :length] + @eval $(f)(A::MyMatrix, args...) = $(f)(A.x, args...) +end +``` + +or `@benchmark` support interpolation of values. This interpolation needs to be handled by the logic of the macro and is not automatically handled by Julia language. + +::: + +Macros do not know about runtime values, they only know about syntax trees. When a macro receives an expression with a `$x` in it, it can't interpolate the value of x into the syntax tree because it reads the syntax tree before `x` ever has a value! + +Instead, when a macro is given an expression with `$` in it, it assumes you're going to give your own meaning to `$x`. In the case of BenchmarkTools.jl they return code that has to wait until runtime to receive the value of `x` and then splice that value into an expression which is evaluated and benchmarked. Nowhere in the actual body of the macro do they have access to the value of `x` though. + + +::: tip Why `$` for interpolation? + +The `$` string for interpolation was used as it identifies the interpolation inside the string and inside the command. For example + +```julia +a = 5 +s = "a = $(a)" +typoef(s) +println(s) +filename = "/tmp/test_of_interpolation" +run(`touch $(filename)`) +``` + +::: + +## [Macro hygiene](@id lec7_hygiene) + +Macro hygiene is a term coined in 1986 addressing the following problem: if you're automatically generating code, it's possible that you will introduce variable names in your generated code that will clash with existing variable names in the scope in which a macro is called. These clashes might cause your generated code to read from or write to variables that you should not be interacting with. A macro is hygienic when it does not interact with existing variables, which means that when macro is evaluated, it should not have any effect on the surrounding code. + +By default, all macros in Julia are hygienic which means that variables introduced in the macro have automatically generated names, where Julia ensures they will not collide with user's variable. These variables are created by `gensym` function / macro. + +::: tip gensym + +`gensym([tag])` Generates a symbol which will not conflict with other variable names. + +```julia +julia> gensym("hello") +Symbol("##hello#257") +``` + +::: + +Let's demonstrate it on our own version of an macro `@elapsed` which will return the time that was needed to evaluate the block of code. + +```julia +macro tooclean_elapsed(ex) + quote + tstart = time() + $(ex) + time() - tstart + end +end + +fib(n) = n <= 1 ? n : fib(n-1) + fib(n - 2) +let + tstart = "should not change the value and type" + t = @tooclean_elapsed r = fib(10) + println("the evaluation of fib took ", t, "s and result is ", r) + @show tstart +end +``` + +We see that variable `r` has not been assigned during the evaluation of macro. We have also used `let` block in orders not to define any variables in the global scope. The problem with the above is that it cannot be nested. +Why is that? +Let's observe how the macro was expanded + +```julia +julia> Base.remove_linenums!(@macroexpand @tooclean_elapsed r = fib(10)) +quote + var"#12#tstart" = Main.time() + var"#13#r" = Main.fib(10) + Main.time() - var"#12#tstart" +end +``` + +We see that `tstart` in the macro definition was replaced by `var"#12#tstart"`, which is a name generated by Julia's gensym to prevent conflict. The same happens to `r`, which was replaced by `var"#13#r"`. This names are the result of Julia's hygiene-enforcing pass, which is intended to prevent us from overwriting existing variables during macro expansion. This pass usually makes our macros safer, but it is also a source of confusion because it introduces a gap between the expressions we generate and the expressions that end up in the resulting source code. Notice that in the case of `tstart`, we actually wanted to replace `tstart` with a unique name, such that if we by a bad luck define `tstart` in our code, it would not be affected, as we can see in this example. + +```julia +let + tstart = "should not change the value and type " + t = @tooclean_elapsed r = fib(10) + println(tstart, " ", typeof(tstart)) +end +``` + +But in the second case, we would actually very much like the variable `r` to retain its name, such that we can accesss the results (and also, `ex` can access and change other local variables). Julia offer a way to `escape` from the hygienic mode, which means that the variables will be used and passed as-is. Notice the effect if we escape just the expression `ex` + +```julia +macro justright_elapsed(ex) + quote + tstart = time() + $(esc(ex)) + time() - tstart + end +end + +let + tstart = "should not change the value and type " + t = @justright_elapsed r = fib(10) + println("the evaluation of fib took ", t, "s and result is ", r) + println(tstart, " ", typeof(tstart)) +end +``` + +which now works as intended. We can inspect the output again using `@macroexpand` + +```julia +julia> Base.remove_linenums!(@macroexpand @justright_elapsed r = fib(10)) +quote + var"#19#tstart" = Main.time() + r = fib(10) + Main.time() - var"#19#tstart" +end +``` + +and compare it to `Base.remove_linenums!(@macroexpand @justright_elapsed r = fib(10))`. We see that the expression `ex` has its symbols intact. To use the escaping / hygience correctly, you need to have a good understanding how the macro evaluation works and what is needed. Let's now try the third version of the macro, where we escape everything as + +```julia +macro toodirty_elapsed(ex) + ex = quote + tstart = time() + $(ex) + time() - tstart + end + esc(ex) +end + +let + tstart = "should not change the value and type " + t = @toodirty_elapsed r = fib(10) + println("the evaluation of fib took ", t, "s and result is ", r) + println(tstart, " ", typeof(tstart)) +end +``` + +Using `@macroexpand` we observe that `@toodirty_elapsed` does not have any trace of hygiene. + +```julia +julia> Base.remove_linenums!(@macroexpand @toodirty_elapsed r = fib(10)) +quote + tstart = time() + r = fib(10) + time() - tstart +end +``` + +From the above we can also see that hygiene-pass occurs after the macro has been applied but before the code is lowered. `esc` is inserted to AST as a special node `Expr(:escape,...),` which can be seen from the follows. + +```julia +julia> esc(:x) +:($(Expr(:escape, :x))) +``` + +The definition in `essentials.jl:480` is pretty simple as `esc(@nospecialize(e)) = Expr(:escape, e)`, but it does not tell anything about the actual implementation, which is hidden probably in the macro-expanding logic. + +With that in mind, we can now understand our original example with `@replace_sin`. Recall that we have defined it as + +```julia +macro replace_sin(ex) + replace_sin(esc(ex)) +end +``` + +where the escaping `replace_sin(esc(ex))` in communicates to compiler that `ex` should be used as without hygienating the `ex`. Indeed, if we lower it + +```julia +function cosp2(x) + @replace_sin 2 + sin(x) +end + +julia> @code_lowered(cosp2(1.0)) +CodeInfo( +1 ─ %1 = Main.cos(x) +│ %2 = 2 + %1 +└── return %2 +) +``` + +we see it works as intended. Whereas if we use hygienic version + +```julia +macro hygienic_replace_sin(ex) + replace_sin(ex) +end + +function hcosp2(x) + @hygienic_replace_sin 2 + sin(x) +end + +julia> @code_lowered(hcosp2(1.0)) +CodeInfo( +1 ─ %1 = Main.cos(Main.x) +│ %2 = 2 + %1 +└── return %2 +) +``` + +### Why hygienating the function calls? + +```julia +function foo(x) + cos(x) = exp(x) + @replace_sin 1 + sin(x) +end + +foo(1.0) ≈ 1 + exp(1.0) + +function foo2(x) + cos(x) = exp(x) + @hygienic_replace_sin 1 + sin(x) +end + +x = 1.0 +foo2(1.0) ≈ 1 + cos(1.0) +``` + +### Can I do the hygiene by myself? + +Yes, it is by some considered to be much simpler (and safer) then to understand, how macro hygiene works. + +```julia +macro manual_elapsed(ex) + x = gensym() + esc(quote + $(x) = time() + $(ex) + time() - $(x) + end + ) +end + +let + t = @manual_elapsed r = fib(10) + println("the evaluation of fib took ", t, "s and result is ", r) +end +``` + +## How macros compose? + +```julia +macro m1(ex) + println("m1: ") + dump(ex) + ex +end + +macro m2(ex) + println("m2: ") + dump(ex) + esc(ex) +end + +@m1 @m2 1 + sin(1) +``` + +which means that macros are expanded in the order from the outer most to inner most, which is exactly the other way around than functions. + +```julia +@macroexpand @m1 @m2 1 + sin(1) +``` + +also notice that the escaping is only partial (running `@macroexpand @m2 @m1 1 + sin(1)` would not change the results). + +## Write @exfiltrate macro + +Since Julia's debugger is a complicated story, people have been looking for tools, which would simplify the debugging. One of them is a macro `@exfiltrate`, which copies all variables in a given scope to a safe place, from where they can be collected later on. This helps you in evaluating the function. F + +Whyle a full implementation is provided in package [`Infiltrator.jl`](https://github.com/JuliaDebug/Infiltrator.jl), we can implement such functionality by outselves. + +- We collect names and values of variables in a given scope using the macro `Base.@locals` +- We store variables in some global variable in some module, such that we have one place from which we can retrieve them and we are certain that this storage would not interact with any existing code. +- If the `@exfiltrate` should be easy, ideally called without parameters, it has to be implemented as a macro to supply the relevant variables to be stored. + +```julia +module Exfiltrator + +const environment = Dict{Symbol, Any}() + +function copy_variables!(d::Dict) + foreach(k -> delete!(environment, k), keys(environment)) + for (k, v) in d + environment[k] = v + end +end + +macro exfiltrate() + v = gensym(:vars) + quote + $(v) = $(esc((Expr(:locals)))) + copy_variables!($(v)) + end +end + +end +``` + +Test it to + +```julia +using Main.Exfiltrator: @exfiltrate +let + x,y,z = 1,"hello", (a = "1", b = "b") + @exfiltrate +end + +Exfiltrator.environment + +function inside_function() + a,b,c = 1,2,3 + @exfiltrate +end + +inside_function() + +Exfiltrator.environment + +function a() + a = 1 + @exfiltrate +end + +function b() + b = 1 + a() +end +function c() + c = 1 + b() +end + +c() +Exfiltrator.environment +``` + +## Domain Specific Languages (DSL) + +Macros are convenient for writing domain specific languages, which are languages designed for specific domain. This allows them to simplify notation and / or make the notation familiar for people working in the field. For example in `Turing.jl`, the model +of coinflips can be specified as + +```julia +@model function coinflip(y) + + # Our prior belief about the probability of heads in a coin. + p ~ Beta(1, 1) + + # The number of observations. + N = length(y) + for n in 1:N + # Heads or tails of a coin are drawn from a Bernoulli distribution. + y[n] ~ Bernoulli(p) + end +end; +``` + +which resembles, but not copy Julia's syntax due to the use of `~`. A similar DSLs can be seen in `ModelingToolkit.jl` for differential equations, in `Soss.jl` again for expressing probability problems, in `Metatheory.jl` / `SymbolicUtils.jl` for defining rules on elements of algebras, or `JuMP.jl` for specific mathematical programs. + +One of the reasons for popularity of DSLs is that macro system is very helpful in their implementation, but it also contraints the DSL, as it has to be parseable by Julia's parser. This is a tremendous helps, because one does not have to care about how to parse numbers, strings, parenthesess, functions, etc. (recall the last lecture about replacing occurences of `i` variable). + +Let's jump into the first example adapted from [John Myles White's howto](https://github.com/johnmyleswhite/julia_tutorials/blob/master/From%20Macros%20to%20DSLs%20in%20Julia%20-%20Part%202%20-%20DSLs.ipynb). +We would like to write a macro, which allows us to define graph in `Graphs.jl` just by defining edges. + +```julia +@graph begin + 1 -> 2 + 2 -> 3 + 3 -> 1 +end +``` + +The above should expand to + +```julia +using Graphs +g = DiGraph(3) +add_edge!(g, 1,2) +add_edge!(g, 2,3) +add_edge!(g, 3,1) +g +``` + +Let's start with easy and observe, how + +```julia +ex = Meta.parse(""" +begin + 1 -> 2 + 2 -> 3 + 3 -> 1 +end +""") +ex = Base.remove_linenums!(ex) +``` + +is parsed to + +```julia +quote + 1->begin + 2 + end + 2->begin + 3 + end + 3->begin + 1 + end +end +``` + +We see that + +- the sequence of statements is parsed to `block` (we know that from last lecture). +- `->` is parsed to `->`, i.e. `ex.args[1].head == :->` with parameters being the first vertex `ex.args[1].args[1] == 1` and the second vertex is quoted to `ex.args[1].args[2].head == :block`. + +The main job will be done in the function `parse_edge`, which will parse one edge. It will check that the node defines edge (otherwise, it will return nothing, which will be filtered out) + +```julia +function parse_edge(ex) + #checking the syntax + !hasproperty(ex, :head) && return(nothing) + !hasproperty(ex, :args) && return(nothing) + ex.head != :-> && return(nothing) + length(ex.args) != 2 && return(nothing) + !hasproperty(ex.args[2], :head) && return(nothing) + ex.args[2].head != :block && length(ex.args[2].args) == 1 && return(nothing) + + #ready to go + src = ex.args[1] + @assert src isa Integer + dst = ex.args[2].args[1] + @assert dst isa Integer + :(add_edge!(g, $(src), $(dst))) +end + +function parse_graph(ex) + @assert ex.head == :block + ex = Base.remove_linenums!(ex) + edges = filter(!isnothing, parse_edge.(ex.args)) + n = maximum(e -> maximum(e.args[3:4]), edges) + quote + g = Graphs.DiGraph($(n)) + $(edges...) + g + end +end +``` + +Once we have the first version, let's make everything hygienic + +```julia +function parse_edge(g, ex::Expr) + #checking the syntax + ex.head != :-> && return(nothing) + length(ex.args) != 2 && return(nothing) + !hasproperty(ex.args[2], :head) && return(nothing) + ex.args[2].head != :block && length(ex.args[2].args) == 1 && return(nothing) + + #ready to go + src = ex.args[1] + @assert src isa Integer + dst = ex.args[2].args[1] + @assert dst isa Integer + :(add_edge!($(g), $(src), $(dst))) +end +parse_edge(g, ex) = nothing + +function parse_graph(ex) + @assert ex.head == :block + g = gensym(:graph) + ex = Base.remove_linenums!(ex) + edges = filter(!isnothing, parse_edge.(g, ex.args)) + n = maximum(e -> maximum(e.args[3:4]), edges) + quote + $(g) = Graphs.DiGraph($(n)) + $(edges...) + $(g) + end +end +``` + +and we are ready to go + +```julia +macro graph(ex) + parse_graph(ex) +end + +@graph begin + 1 -> 2 + 2 -> 3 + 3 -> 1 +end +``` + +and we can check the output with `@macroexpand`. + +```julia +julia> @macroexpand @graph begin + 1 -> 2 + 2 -> 3 + 3 -> 1 + end +quote + #= REPL[173]:8 =# + var"#27###graph#273" = (Main.Graphs).DiGraph(3) + #= REPL[173]:9 =# + Main.add_edge!(var"#27###graph#273", 1, 2) + Main.add_edge!(var"#27###graph#273", 2, 3) + Main.add_edge!(var"#27###graph#273", 3, 1) + #= REPL[173]:10 =# + var"#27###graph#273" +end +``` + +## non-standard string literals + +Julia allows to customize parsing of strings. For example we can define regexp matcher as +`r"^\s*(?:#|$)"`, i.e. using the usual string notation prepended by the string `r`. + +You can define these "parsers" by yourself using the macro definition with suffix `_str` + +```julia +macro debug_str(p) + @show p + p +end +``` + +by invoking it + +```julia +debug"hello" +``` + +we see that the string macro receives string as an argument. + +Why are they useful? Sometimes, we want to use syntax which is not compatible with Julia's parser. For example `IntervalArithmetics.jl` allows to define an interval open only from one side, for example `[a, b)`, which is something that Julia's parser would not like much. String macro solves this problem by letting you to write the parser by your own. + +```julia +struct Interval{T} + left::T + right::T + left_open::Bool + right_open::Bool +end + +function Interval(s::String) + s[1] == '(' || s[1] == '[' || error("left nterval can be only [,(") + s[end] == ')' || s[end] == ']' || error("left nterval can be only ],)") + left_open = s[1] == '(' ? true : false + right_open = s[end] == ')' ? true : false + ss = parse.(Float64, split(s[2:end-1],",")) + length(ss) != 2 && error("interval should have two numbers separated by ','") + Interval(ss..., left_open, right_open) +end + +function Base.show(io::IO, r::Interval) + lb = r.left_open ? "(" : "[" + rb = r.right_open ? ")" : "]" + print(io, lb,r.left,",",r.right,rb) +end +``` + +We can check it does the job by trying `Interval("[1,2)")`. +Finally, we define a string macro as + +```julia +macro int_str(s) + Interval(s) +end +``` + +which allows us to define interval as `int"[1,2)"`. + +## Sources + +- Great discussion on [evaluation of macros](https://discourse.julialang.org/t/interpolation-in-macro-calls/25530). diff --git a/docs_vitepress/src/lectures/lecture_07/macros.md b/docs_vitepress/src/lectures/lecture_07/macros.md new file mode 100644 index 00000000..19921943 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_07/macros.md @@ -0,0 +1 @@ +# Macros diff --git a/docs_vitepress/src/lectures/lecture_08/Makefile b/docs_vitepress/src/lectures/lecture_08/Makefile new file mode 100644 index 00000000..12acd541 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/Makefile @@ -0,0 +1,10 @@ +graphs: + pdflatex graphdiff_6.tex + pdflatex graphdiff_7.tex + pdflatex graphdiff_9.tex + pdflatex graphdiff_14.tex + pdf2svg graphdiff_6.pdf graphdiff_6.svg + pdf2svg graphdiff_7.pdf graphdiff_7.svg + pdf2svg graphdiff_9.pdf graphdiff_9.svg + pdf2svg graphdiff_14.pdf graphdiff_14.svg + rm graphdiff_6.pdf graphdiff_7.pdf graphdiff_9.pdf graphdiff_14.pdf diff --git a/docs_vitepress/src/lectures/lecture_08/Manifest.toml b/docs_vitepress/src/lectures/lecture_08/Manifest.toml new file mode 100644 index 00000000..e5cb4300 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/Manifest.toml @@ -0,0 +1,1063 @@ +# This file is machine-generated - editing it directly is not advised + +[[AbstractFFTs]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "485ee0867925449198280d4af84bdb46a2a404d0" +uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" +version = "1.0.1" + +[[AbstractTrees]] +git-tree-sha1 = "03e0550477d86222521d254b741d470ba17ea0b5" +uuid = "1520ce14-60c1-5f80-bbc7-55ef81b5835c" +version = "0.3.4" + +[[Adapt]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "84918055d15b3114ede17ac6a7182f68870c16f7" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.3.1" + +[[ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" + +[[ArnoldiMethod]] +deps = ["LinearAlgebra", "Random", "StaticArrays"] +git-tree-sha1 = "f87e559f87a45bece9c9ed97458d3afe98b1ebb9" +uuid = "ec485272-7323-5ecc-a04f-4719b315124d" +version = "0.1.0" + +[[Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[AxisAlgorithms]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "WoodburyMatrices"] +git-tree-sha1 = "66771c8d21c8ff5e3a93379480a2307ac36863f7" +uuid = "13072b0f-2c55-5437-9ae7-d433b7a33950" +version = "1.0.1" + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.8+0" + +[[Cairo_jll]] +deps = ["Artifacts", "Bzip2_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "JLLWrappers", "LZO_jll", "Libdl", "Pixman_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "f2202b55d816427cd385a9a4f3ffb226bee80f99" +uuid = "83423d85-b0ee-5818-9007-b63ccbeb887a" +version = "1.16.1+0" + +[[ChainRules]] +deps = ["ChainRulesCore", "Compat", "LinearAlgebra", "Random", "RealDot", "Statistics"] +git-tree-sha1 = "035ef8a5382a614b2d8e3091b6fdbb1c2b050e11" +uuid = "082447d4-558c-5d27-93f4-14fc19e9eca2" +version = "1.12.1" + +[[ChainRulesCore]] +deps = ["Compat", "LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "f885e7e7c124f8c92650d61b9477b9ac2ee607dd" +uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +version = "1.11.1" + +[[ChangesOfVariables]] +deps = ["LinearAlgebra", "Test"] +git-tree-sha1 = "9a1d594397670492219635b35a3d830b04730d62" +uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" +version = "0.1.1" + +[[ColorSchemes]] +deps = ["ColorTypes", "Colors", "FixedPointNumbers", "Random"] +git-tree-sha1 = "a851fec56cb73cfdf43762999ec72eff5b86882a" +uuid = "35d6a980-a343-548e-a6ea-1d62b119f2f4" +version = "3.15.0" + +[[ColorTypes]] +deps = ["FixedPointNumbers", "Random"] +git-tree-sha1 = "32a2b8af383f11cbb65803883837a149d10dfe8a" +uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" +version = "0.10.12" + +[[Colors]] +deps = ["ColorTypes", "FixedPointNumbers", "Reexport"] +git-tree-sha1 = "417b0ed7b8b838aa6ca0a87aadf1bb9eb111ce40" +uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" +version = "0.12.8" + +[[CommonSubexpressions]] +deps = ["MacroTools", "Test"] +git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" +uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" +version = "0.3.0" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "SHA", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "dce3e3fea680869eaa0b774b2e8343e9ff442313" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "3.40.0" + +[[CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" + +[[Contour]] +deps = ["StaticArrays"] +git-tree-sha1 = "9f02045d934dc030edad45944ea80dbd1f0ebea7" +uuid = "d38c429a-6771-53c6-b99e-75d170b6e991" +version = "0.5.7" + +[[DataAPI]] +git-tree-sha1 = "cc70b17275652eb47bc9e5f81635981f13cea5c8" +uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" +version = "1.9.0" + +[[DataStructures]] +deps = ["Compat", "InteractiveUtils", "OrderedCollections"] +git-tree-sha1 = "7d9d316f04214f7efdbb6398d545446e246eff02" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.18.10" + +[[DataValueInterfaces]] +git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" +uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" +version = "1.0.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[DiffResults]] +deps = ["StaticArrays"] +git-tree-sha1 = "c18e98cba888c6c25d1c3b048e4b3380ca956805" +uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" +version = "1.0.3" + +[[DiffRules]] +deps = ["LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] +git-tree-sha1 = "3287dacf67c3652d3fed09f4c12c187ae4dbb89a" +uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" +version = "1.4.0" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[DocStringExtensions]] +deps = ["LibGit2"] +git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b" +uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" +version = "0.8.6" + +[[Downloads]] +deps = ["ArgTools", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" + +[[EarCut_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "3f3a2501fa7236e9b911e0f7a588c657e822bb6d" +uuid = "5ae413db-bbd1-5e63-b57d-d24a61df00f5" +version = "2.2.3+0" + +[[Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "b3bfd02e98aedfa5cf885665493c5598c350cd2f" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.2.10+0" + +[[FFMPEG]] +deps = ["FFMPEG_jll"] +git-tree-sha1 = "b57e3acbe22f8484b4b5ff66a7499717fe1a9cc8" +uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a" +version = "0.4.1" + +[[FFMPEG_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "JLLWrappers", "LAME_jll", "Libdl", "Ogg_jll", "OpenSSL_jll", "Opus_jll", "Pkg", "Zlib_jll", "libass_jll", "libfdk_aac_jll", "libvorbis_jll", "x264_jll", "x265_jll"] +git-tree-sha1 = "d8a578692e3077ac998b50c0217dfd67f21d1e5f" +uuid = "b22a6f82-2f65-5046-a5b2-351ab43fb4e5" +version = "4.4.0+0" + +[[FillArrays]] +deps = ["LinearAlgebra", "Random", "SparseArrays", "Statistics"] +git-tree-sha1 = "8756f9935b7ccc9064c6eef0bff0ad643df733a3" +uuid = "1a297f60-69ca-5386-bcde-b61e274b549b" +version = "0.12.7" + +[[FiniteDifferences]] +deps = ["ChainRulesCore", "LinearAlgebra", "Printf", "Random", "Richardson", "StaticArrays"] +git-tree-sha1 = "c56a261e1a5472f20cbd7aa218840fd203243319" +uuid = "26cc04aa-876d-5657-8c51-4c34ba976000" +version = "0.12.19" + +[[FixedPointNumbers]] +deps = ["Statistics"] +git-tree-sha1 = "335bfdceacc84c5cdf16aadc768aa5ddfc5383cc" +uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" +version = "0.8.4" + +[[Fontconfig_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "FreeType2_jll", "JLLWrappers", "Libdl", "Libuuid_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "21efd19106a55620a188615da6d3d06cd7f6ee03" +uuid = "a3f928ae-7b40-5064-980b-68af3947d34b" +version = "2.13.93+0" + +[[Formatting]] +deps = ["Printf"] +git-tree-sha1 = "8339d61043228fdd3eb658d86c926cb282ae72a8" +uuid = "59287772-0a20-5a39-b81b-1366585eb4c0" +version = "0.4.2" + +[[ForwardDiff]] +deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions", "StaticArrays"] +git-tree-sha1 = "6406b5112809c08b1baa5703ad274e1dded0652f" +uuid = "f6369f11-7733-5829-9624-2563aa707210" +version = "0.10.23" + +[[FreeType2_jll]] +deps = ["Artifacts", "Bzip2_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "87eb71354d8ec1a96d4a7636bd57a7347dde3ef9" +uuid = "d7e528f0-a631-5988-bf34-fe36492bcfd7" +version = "2.10.4+0" + +[[FriBidi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "aa31987c2ba8704e23c6c8ba8a4f769d5d7e4f91" +uuid = "559328eb-81f9-559d-9380-de523a88c83c" +version = "1.0.10+0" + +[[GLFW_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libglvnd_jll", "Pkg", "Xorg_libXcursor_jll", "Xorg_libXi_jll", "Xorg_libXinerama_jll", "Xorg_libXrandr_jll"] +git-tree-sha1 = "0c603255764a1fa0b61752d2bec14cfbd18f7fe8" +uuid = "0656b61e-2033-5cc2-a64a-77c0f6c09b89" +version = "3.3.5+1" + +[[GR]] +deps = ["Base64", "DelimitedFiles", "GR_jll", "HTTP", "JSON", "Libdl", "LinearAlgebra", "Pkg", "Printf", "Random", "Serialization", "Sockets", "Test", "UUIDs"] +git-tree-sha1 = "30f2b340c2fff8410d89bfcdc9c0a6dd661ac5f7" +uuid = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71" +version = "0.62.1" + +[[GR_jll]] +deps = ["Artifacts", "Bzip2_jll", "Cairo_jll", "FFMPEG_jll", "Fontconfig_jll", "GLFW_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pixman_jll", "Pkg", "Qt5Base_jll", "Zlib_jll", "libpng_jll"] +git-tree-sha1 = "fd75fa3a2080109a2c0ec9864a6e14c60cca3866" +uuid = "d2c73de3-f751-5644-a686-071e5b155ba9" +version = "0.62.0+0" + +[[GeometryBasics]] +deps = ["EarCut_jll", "IterTools", "LinearAlgebra", "StaticArrays", "StructArrays", "Tables"] +git-tree-sha1 = "58bcdf5ebc057b085e58d95c138725628dd7453c" +uuid = "5c1252a2-5f33-56bf-86c9-59e7332b4326" +version = "0.4.1" + +[[GeometryTypes]] +deps = ["ColorTypes", "FixedPointNumbers", "LinearAlgebra", "StaticArrays"] +git-tree-sha1 = "07194161fe4e181c6bf51ef2e329ec4e7d050fc4" +uuid = "4d00f742-c7ba-57c2-abde-4428a4b178cb" +version = "0.8.4" + +[[Gettext_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "9b02998aba7bf074d14de89f9d37ca24a1a0b046" +uuid = "78b55507-aeef-58d4-861c-77aaff3498b1" +version = "0.21.0+0" + +[[Glib_jll]] +deps = ["Artifacts", "Gettext_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Libiconv_jll", "Libmount_jll", "PCRE_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "7bf67e9a481712b3dbe9cb3dac852dc4b1162e02" +uuid = "7746bdde-850d-59dc-9ae8-88ece973131d" +version = "2.68.3+0" + +[[GraphRecipes]] +deps = ["AbstractTrees", "GeometryTypes", "Graphs", "InteractiveUtils", "Interpolations", "LinearAlgebra", "NaNMath", "NetworkLayout", "PlotUtils", "RecipesBase", "SparseArrays", "Statistics"] +git-tree-sha1 = "8bddeec95f8c824d246d53de0230e225027bd3d9" +uuid = "bd48cda9-67a9-57be-86fa-5b3c104eda73" +version = "0.5.8" + +[[Graphite2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "344bf40dcab1073aca04aa0df4fb092f920e4011" +uuid = "3b182d85-2403-5c21-9c21-1e1f0cc25472" +version = "1.3.14+0" + +[[Graphs]] +deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "92243c07e786ea3458532e199eb3feee0e7e08eb" +uuid = "86223c79-3864-5bf0-83f7-82e725a168b6" +version = "1.4.1" + +[[Grisu]] +git-tree-sha1 = "53bb909d1151e57e2484c3d1b53e19552b887fb2" +uuid = "42e2da0e-8278-4e71-bc24-59509adca0fe" +version = "1.0.2" + +[[HTTP]] +deps = ["Base64", "Dates", "IniFile", "Logging", "MbedTLS", "NetworkOptions", "Sockets", "URIs"] +git-tree-sha1 = "14eece7a3308b4d8be910e265c724a6ba51a9798" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "0.9.16" + +[[HarfBuzz_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "FreeType2_jll", "Glib_jll", "Graphite2_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg"] +git-tree-sha1 = "8a954fed8ac097d5be04921d595f741115c1b2ad" +uuid = "2e76f6c2-a576-52d4-95c1-20adfe4de566" +version = "2.8.1+0" + +[[IRTools]] +deps = ["InteractiveUtils", "MacroTools", "Test"] +git-tree-sha1 = "95215cd0076a150ef46ff7928892bc341864c73c" +uuid = "7869d1d1-7146-5819-86e3-90919afe41df" +version = "0.4.3" + +[[Inflate]] +git-tree-sha1 = "f5fc07d4e706b84f72d54eedcc1c13d92fb0871c" +uuid = "d25df0c9-e2be-5dd7-82c8-3ad0b3e990b9" +version = "0.1.2" + +[[IniFile]] +deps = ["Test"] +git-tree-sha1 = "098e4d2c533924c921f9f9847274f2ad89e018b8" +uuid = "83e8ac13-25f8-5344-8a64-a9f2b223428f" +version = "0.5.0" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[Interpolations]] +deps = ["AxisAlgorithms", "ChainRulesCore", "LinearAlgebra", "OffsetArrays", "Random", "Ratios", "Requires", "SharedArrays", "SparseArrays", "StaticArrays", "WoodburyMatrices"] +git-tree-sha1 = "61aa005707ea2cebf47c8d780da8dc9bc4e0c512" +uuid = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59" +version = "0.13.4" + +[[InverseFunctions]] +deps = ["Test"] +git-tree-sha1 = "a7254c0acd8e62f1ac75ad24d5db43f5f19f3c65" +uuid = "3587e190-3f89-42d0-90ee-14403ec27112" +version = "0.1.2" + +[[IrrationalConstants]] +git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" +uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" +version = "0.1.1" + +[[IterTools]] +git-tree-sha1 = "05110a2ab1fc5f932622ffea2a003221f4782c18" +uuid = "c8e1da08-722c-5040-9ed9-7db0dc04731e" +version = "1.3.0" + +[[IteratorInterfaceExtensions]] +git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "1.0.0" + +[[JLLWrappers]] +deps = ["Preferences"] +git-tree-sha1 = "642a199af8b68253517b80bd3bfd17eb4e84df6e" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.3.0" + +[[JSON]] +deps = ["Dates", "Mmap", "Parsers", "Unicode"] +git-tree-sha1 = "8076680b162ada2a031f707ac7b4953e30667a37" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.21.2" + +[[JpegTurbo_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "d735490ac75c5cb9f1b00d8b5509c11984dc6943" +uuid = "aacddb02-875f-59d6-b918-886e6ef4fbf8" +version = "2.1.0+0" + +[[LAME_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "f6250b16881adf048549549fba48b1161acdac8c" +uuid = "c1c5ebd0-6772-5130-a774-d5fcae4a789d" +version = "3.100.1+0" + +[[LZO_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "e5b909bcf985c5e2605737d2ce278ed791b89be6" +uuid = "dd4b983a-f0e5-5f8d-a1b7-129d4a5fb1ac" +version = "2.10.1+0" + +[[LaTeXStrings]] +git-tree-sha1 = "f2355693d6778a178ade15952b7ac47a4ff97996" +uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" +version = "1.3.0" + +[[Latexify]] +deps = ["Formatting", "InteractiveUtils", "LaTeXStrings", "MacroTools", "Markdown", "Printf", "Requires"] +git-tree-sha1 = "a8f4f279b6fa3c3c4f1adadd78a621b13a506bce" +uuid = "23fbe1c1-3f47-55db-b15f-69d7ec21a316" +version = "0.15.9" + +[[LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" + +[[LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" + +[[LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0b4a5d71f3e5200a7dff793393e09dfc2d874290" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.2+1" + +[[Libgcrypt_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgpg_error_jll", "Pkg"] +git-tree-sha1 = "64613c82a59c120435c067c2b809fc61cf5166ae" +uuid = "d4300ac3-e22c-5743-9152-c294e39db1e4" +version = "1.8.7+0" + +[[Libglvnd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll", "Xorg_libXext_jll"] +git-tree-sha1 = "7739f837d6447403596a75d19ed01fd08d6f56bf" +uuid = "7e76a0d4-f3c7-5321-8279-8d96eeed0f29" +version = "1.3.0+3" + +[[Libgpg_error_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "c333716e46366857753e273ce6a69ee0945a6db9" +uuid = "7add5ba3-2f88-524e-9cd5-f83b8a55f7b8" +version = "1.42.0+0" + +[[Libiconv_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "42b62845d70a619f063a7da093d995ec8e15e778" +uuid = "94ce4f54-9a6c-5748-9c1c-f9c7231a4531" +version = "1.16.1+1" + +[[Libmount_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "9c30530bf0effd46e15e0fdcf2b8636e78cbbd73" +uuid = "4b2f31a3-9ecc-558c-b454-b3730dcb73e9" +version = "2.35.0+0" + +[[Libtiff_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Pkg", "Zlib_jll", "Zstd_jll"] +git-tree-sha1 = "340e257aada13f95f98ee352d316c3bed37c8ab9" +uuid = "89763e89-9b03-5906-acba-b20f662cd828" +version = "4.3.0+0" + +[[Libuuid_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "7f3efec06033682db852f8b3bc3c1d2b0a0ab066" +uuid = "38a345b3-de98-5d2b-a5d3-14cd9215e700" +version = "2.36.0+0" + +[[LightGraphs]] +deps = ["ArnoldiMethod", "DataStructures", "Distributed", "Inflate", "LinearAlgebra", "Random", "SharedArrays", "SimpleTraits", "SparseArrays", "Statistics"] +git-tree-sha1 = "432428df5f360964040ed60418dd5601ecd240b6" +uuid = "093fc24a-ae57-5d10-9952-331d41423f4d" +version = "1.3.5" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + + +[[LittleCMS_jll]] +deps = ["Artifacts", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "Pkg"] +git-tree-sha1 = "110897e7db2d6836be22c18bffd9422218ee6284" +uuid = "d3a379c0-f9a3-5b72-a4c0-6bf4d2e8af0f" +version = "2.12.0+0" + +[[LogExpFunctions]] +deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] +git-tree-sha1 = "be9eef9f9d78cecb6f262f3c10da151a6c5ab827" +uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" +version = "0.3.5" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.9" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "Random", "Sockets"] +git-tree-sha1 = "1c38e51c3d08ef2278062ebceade0e46cefc96fe" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.0.3" + +[[MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" + +[[Measures]] +git-tree-sha1 = "e498ddeee6f9fdb4551ce855a46f54dbd900245f" +uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e" +version = "0.3.1" + +[[Missings]] +deps = ["DataAPI"] +git-tree-sha1 = "bf210ce90b6c9eed32d25dbcae1ebc565df2687f" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "1.0.2" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" + +[[NaNMath]] +git-tree-sha1 = "bfe47e760d60b82b66b61d2d44128b62e3a369fb" +uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" +version = "0.3.5" + +[[NetworkLayout]] +deps = ["GeometryBasics", "LinearAlgebra", "Random", "Requires", "SparseArrays"] +git-tree-sha1 = "24e10982e84dd35cd867102243454bf8a4581a76" +uuid = "46757867-2c16-5918-afeb-47bfcb05e46a" +version = "0.4.3" + +[[NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" + +[[OffsetArrays]] +deps = ["Adapt"] +git-tree-sha1 = "043017e0bdeff61cfbb7afeb558ab29536bbb5ed" +uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +version = "1.10.8" + +[[Ogg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "7937eda4681660b4d6aeeecc2f7e1c81c8ee4e2f" +uuid = "e7412a2a-1a6e-54c0-be00-318e2571c051" +version = "1.3.5+0" + + +[[OpenJpeg_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libtiff_jll", "LittleCMS_jll", "Pkg", "libpng_jll"] +git-tree-sha1 = "76374b6e7f632c130e78100b166e5a48464256f8" +uuid = "643b3616-a352-519d-856d-80112ee9badc" +version = "2.4.0+0" + +[[OpenLibm_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "05823500-19ac-5b8b-9628-191a04bc5112" + +[[OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "15003dcb7d8db3c6c857fda14891a539a8f2705a" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.10+0" + +[[OpenSpecFun_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" +uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" +version = "0.5.5+0" + +[[Opus_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "51a08fb14ec28da2ec7a927c4337e4332c2a4720" +uuid = "91d4177d-7536-5919-b921-800302f37372" +version = "1.3.2+0" + +[[OrderedCollections]] +git-tree-sha1 = "85f8e6578bf1f9ee0d11e7bb1b1456435479d47c" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.4.1" + +[[PCRE_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "b2a7af664e098055a7529ad1a900ded962bca488" +uuid = "2f80f16e-611a-54ab-bc61-aa92de5b98fc" +version = "8.44.0+0" + +[[Parsers]] +deps = ["Dates"] +git-tree-sha1 = "ae4bbcadb2906ccc085cf52ac286dc1377dceccc" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.1.2" + +[[Pixman_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "b4f5d02549a10e20780a24fce72bea96b6329e29" +uuid = "30392449-352a-5448-841d-b1acce4e97dc" +version = "0.40.1+0" + +[[Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[PlotThemes]] +deps = ["PlotUtils", "Requires", "Statistics"] +git-tree-sha1 = "a3a964ce9dc7898193536002a6dd892b1b5a6f1d" +uuid = "ccf2f8ad-2431-5c83-bf29-c5338b663b6a" +version = "2.0.1" + +[[PlotUtils]] +deps = ["ColorSchemes", "Colors", "Dates", "Printf", "Random", "Reexport", "Statistics"] +git-tree-sha1 = "b084324b4af5a438cd63619fd006614b3b20b87b" +uuid = "995b91a9-d308-5afd-9ec6-746e21dbc043" +version = "1.0.15" + +[[Plots]] +deps = ["Base64", "Contour", "Dates", "Downloads", "FFMPEG", "FixedPointNumbers", "GR", "GeometryBasics", "JSON", "Latexify", "LinearAlgebra", "Measures", "NaNMath", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "RecipesPipeline", "Reexport", "Requires", "Scratch", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs", "UnicodeFun"] +git-tree-sha1 = "0d185e8c33401084cab546a756b387b15f76720c" +uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +version = "1.23.6" + + +[[Poppler_jll]] +deps = ["Artifacts", "Cairo_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "JpegTurbo_jll", "Libdl", "Libtiff_jll", "OpenJpeg_jll", "Pkg", "libpng_jll"] +git-tree-sha1 = "e11443687ac151ac6ef6699eb75f964bed8e1faa" +uuid = "9c32591e-4766-534b-9725-b71a8799265b" +version = "0.87.0+2" + +[[Preferences]] +deps = ["TOML"] +git-tree-sha1 = "00cfd92944ca9c760982747e9a1d0d5d86ab1e5a" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.2.2" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[Qt5Base_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Fontconfig_jll", "Glib_jll", "JLLWrappers", "Libdl", "Libglvnd_jll", "OpenSSL_jll", "Pkg", "Xorg_libXext_jll", "Xorg_libxcb_jll", "Xorg_xcb_util_image_jll", "Xorg_xcb_util_keysyms_jll", "Xorg_xcb_util_renderutil_jll", "Xorg_xcb_util_wm_jll", "Zlib_jll", "xkbcommon_jll"] +git-tree-sha1 = "ad368663a5e20dbb8d6dc2fddeefe4dae0781ae8" +uuid = "ea2cea3b-5b76-57ae-a6ef-0a8af62496e1" +version = "5.15.3+0" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[Ratios]] +deps = ["Requires"] +git-tree-sha1 = "01d341f502250e81f6fec0afe662aa861392a3aa" +uuid = "c84ed2f1-dad5-54f0-aa8e-dbefe2724439" +version = "0.4.2" + +[[RealDot]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "9f0a1b71baaf7650f4fa8a1d168c7fb6ee41f0c9" +uuid = "c1ae055f-0cd5-4b69-90a6-9a35b1a98df9" +version = "0.1.0" + +[[RecipesBase]] +git-tree-sha1 = "44a75aa7a527910ee3d1751d1f0e4148698add9e" +uuid = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" +version = "1.1.2" + +[[RecipesPipeline]] +deps = ["Dates", "NaNMath", "PlotUtils", "RecipesBase"] +git-tree-sha1 = "7ad0dfa8d03b7bcf8c597f59f5292801730c55b8" +uuid = "01d81517-befc-4cb6-b9ec-a95719d0359c" +version = "0.4.1" + +[[Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "4036a3bd08ac7e968e27c203d45f5fff15020621" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.1.3" + + +[[Richardson]] +deps = ["LinearAlgebra"] +git-tree-sha1 = "e03ca566bec93f8a3aeb059c8ef102f268a38949" +uuid = "708f8203-808e-40c0-ba2d-98a6953ed40d" +version = "1.4.0" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Scratch]] +deps = ["Dates"] +git-tree-sha1 = "0b4b7f1393cff97c33891da2a0bf69c6ed241fda" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.1.0" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Showoff]] +deps = ["Dates", "Grisu"] +git-tree-sha1 = "91eddf657aca81df9ae6ceb20b959ae5653ad1de" +uuid = "992d4aef-0814-514b-bc4d-f2e9a6c4116f" +version = "1.0.3" + +[[SimpleTraits]] +deps = ["InteractiveUtils", "MacroTools"] +git-tree-sha1 = "5d7e3f4e11935503d3ecaf7186eac40602e7d231" +uuid = "699a6c99-e7fa-54fc-8d76-47d257e15c1d" +version = "0.9.4" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SortingAlgorithms]] +deps = ["DataStructures"] +git-tree-sha1 = "b3363d7460f7d098ca0912c69b082f75625d7508" +uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" +version = "1.0.1" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[SpecialFunctions]] +deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] +git-tree-sha1 = "f0bccf98e16759818ffc5d97ac3ebf87eb950150" +uuid = "276daf66-3868-5448-9aa4-cd146d93841b" +version = "1.8.1" + +[[StaticArrays]] +deps = ["LinearAlgebra", "Random", "Statistics"] +git-tree-sha1 = "3c76dde64d03699e074ac02eb2e8ba8254d428da" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.2.13" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[StatsAPI]] +git-tree-sha1 = "1958272568dc176a1d881acb797beb909c785510" +uuid = "82ae8749-77ed-4fe6-ae5f-f523153014b0" +version = "1.0.0" + +[[StatsBase]] +deps = ["DataAPI", "DataStructures", "LinearAlgebra", "LogExpFunctions", "Missings", "Printf", "Random", "SortingAlgorithms", "SparseArrays", "Statistics", "StatsAPI"] +git-tree-sha1 = "eb35dcc66558b2dda84079b9a1be17557d32091a" +uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +version = "0.33.12" + +[[StructArrays]] +deps = ["Adapt", "DataAPI", "StaticArrays", "Tables"] +git-tree-sha1 = "2ce41e0d042c60ecd131e9fb7154a3bfadbf50d3" +uuid = "09ab397b-f2b6-538f-b94a-2f83cf4a842a" +version = "0.6.3" + +[[TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions"] +git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "1.0.1" + +[[Tables]] +deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "TableTraits", "Test"] +git-tree-sha1 = "fed34d0e71b91734bf0a7e10eb1bb05296ddbcd0" +uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" +version = "1.6.0" + +[[Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" + + +[[Tectonic]] +deps = ["Pkg"] +git-tree-sha1 = "acf12eccb390a78653ee805cd527898f01f78a85" +uuid = "9ac5f52a-99c6-489f-af81-462ef484790f" +version = "0.6.1" + +[[Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + + +[[TikzGraphs]] +deps = ["LaTeXStrings", "LightGraphs", "TikzPictures"] +git-tree-sha1 = "48932ba660bc8cefc0aa9519ba79d63082aea892" +uuid = "b4f28e30-c73f-5eaf-a395-8a9db949a742" +version = "1.2.0" + +[[TikzPictures]] +deps = ["LaTeXStrings", "Poppler_jll", "Requires", "Tectonic"] +git-tree-sha1 = "a08671c0979063a437378f6410bb75a465f3cd1c" +uuid = "37f6aa50-8035-52d0-81c2-5a1d08754b2d" +version = "3.4.1" + +[[URIs]] +git-tree-sha1 = "97bbe755a53fe859669cd907f2d96aee8d2c1355" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.3.0" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[UnicodeFun]] +deps = ["REPL"] +git-tree-sha1 = "53915e50200959667e78a92a418594b428dffddf" +uuid = "1cfade01-22cf-5700-b092-accc4b62d6e1" +version = "0.4.1" + +[[Wayland_jll]] +deps = ["Artifacts", "Expat_jll", "JLLWrappers", "Libdl", "Libffi_jll", "Pkg", "XML2_jll"] +git-tree-sha1 = "3e61f0b86f90dacb0bc0e73a0c5a83f6a8636e23" +uuid = "a2964d1f-97da-50d4-b82a-358c7fce9d89" +version = "1.19.0+0" + +[[Wayland_protocols_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll"] +git-tree-sha1 = "2839f1c1296940218e35df0bbb220f2a79686670" +uuid = "2381bf8a-dfd0-557d-9999-79630e7b1b91" +version = "1.18.0+4" + +[[WoodburyMatrices]] +deps = ["LinearAlgebra", "SparseArrays"] +git-tree-sha1 = "de67fa59e33ad156a590055375a30b23c40299d3" +uuid = "efce3f68-66dc-5838-9240-27a6d6f5f9b6" +version = "0.5.5" + +[[XML2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libiconv_jll", "Pkg", "Zlib_jll"] +git-tree-sha1 = "1acf5bdf07aa0907e0a37d3718bb88d4b687b74a" +uuid = "02c8fc9c-b97f-50b9-bbe4-9be30ff0a78a" +version = "2.9.12+0" + +[[XSLT_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Libgcrypt_jll", "Libgpg_error_jll", "Libiconv_jll", "Pkg", "XML2_jll", "Zlib_jll"] +git-tree-sha1 = "91844873c4085240b95e795f692c4cec4d805f8a" +uuid = "aed1982a-8fda-507f-9586-7b0439959a61" +version = "1.1.34+0" + +[[Xorg_libX11_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll", "Xorg_xtrans_jll"] +git-tree-sha1 = "5be649d550f3f4b95308bf0183b82e2582876527" +uuid = "4f6342f7-b3d2-589e-9d20-edeb45f2b2bc" +version = "1.6.9+4" + +[[Xorg_libXau_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4e490d5c960c314f33885790ed410ff3a94ce67e" +uuid = "0c0b7dd1-d40b-584c-a123-a41640f87eec" +version = "1.0.9+4" + +[[Xorg_libXcursor_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXfixes_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "12e0eb3bc634fa2080c1c37fccf56f7c22989afd" +uuid = "935fb764-8cf2-53bf-bb30-45bb1f8bf724" +version = "1.2.0+4" + +[[Xorg_libXdmcp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fe47bd2247248125c428978740e18a681372dd4" +uuid = "a3789734-cfe1-5b06-b2d0-1dd0d9d62d05" +version = "1.1.3+4" + +[[Xorg_libXext_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "b7c0aa8c376b31e4852b360222848637f481f8c3" +uuid = "1082639a-0dae-5f34-9b06-72781eeb8cb3" +version = "1.3.4+4" + +[[Xorg_libXfixes_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "0e0dc7431e7a0587559f9294aeec269471c991a4" +uuid = "d091e8ba-531a-589c-9de9-94069b037ed8" +version = "5.0.3+4" + +[[Xorg_libXi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXfixes_jll"] +git-tree-sha1 = "89b52bc2160aadc84d707093930ef0bffa641246" +uuid = "a51aa0fd-4e3c-5386-b890-e753decda492" +version = "1.7.10+4" + +[[Xorg_libXinerama_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll"] +git-tree-sha1 = "26be8b1c342929259317d8b9f7b53bf2bb73b123" +uuid = "d1454406-59df-5ea1-beac-c340f2130bc3" +version = "1.1.4+4" + +[[Xorg_libXrandr_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libXext_jll", "Xorg_libXrender_jll"] +git-tree-sha1 = "34cea83cb726fb58f325887bf0612c6b3fb17631" +uuid = "ec84b674-ba8e-5d96-8ba1-2a689ba10484" +version = "1.5.2+4" + +[[Xorg_libXrender_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "19560f30fd49f4d4efbe7002a1037f8c43d43b96" +uuid = "ea2f1a96-1ddc-540d-b46f-429655e07cfa" +version = "0.9.10+4" + +[[Xorg_libpthread_stubs_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6783737e45d3c59a4a4c4091f5f88cdcf0908cbb" +uuid = "14d82f49-176c-5ed1-bb49-ad3f5cbd8c74" +version = "0.1.0+3" + +[[Xorg_libxcb_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "XSLT_jll", "Xorg_libXau_jll", "Xorg_libXdmcp_jll", "Xorg_libpthread_stubs_jll"] +git-tree-sha1 = "daf17f441228e7a3833846cd048892861cff16d6" +uuid = "c7cfdc94-dc32-55de-ac96-5a1b8d977c5b" +version = "1.13.0+3" + +[[Xorg_libxkbfile_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libX11_jll"] +git-tree-sha1 = "926af861744212db0eb001d9e40b5d16292080b2" +uuid = "cc61e674-0454-545c-8b26-ed2c68acab7a" +version = "1.1.0+4" + +[[Xorg_xcb_util_image_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "0fab0a40349ba1cba2c1da699243396ff8e94b97" +uuid = "12413925-8142-5f55-bb0e-6d7ca50bb09b" +version = "0.4.0+1" + +[[Xorg_xcb_util_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxcb_jll"] +git-tree-sha1 = "e7fd7b2881fa2eaa72717420894d3938177862d1" +uuid = "2def613f-5ad1-5310-b15b-b15d46f528f5" +version = "0.4.0+1" + +[[Xorg_xcb_util_keysyms_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "d1151e2c45a544f32441a567d1690e701ec89b00" +uuid = "975044d2-76e6-5fbe-bf08-97ce7c6574c7" +version = "0.4.0+1" + +[[Xorg_xcb_util_renderutil_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "dfd7a8f38d4613b6a575253b3174dd991ca6183e" +uuid = "0d47668e-0667-5a69-a72c-f761630bfb7e" +version = "0.3.9+1" + +[[Xorg_xcb_util_wm_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xcb_util_jll"] +git-tree-sha1 = "e78d10aab01a4a154142c5006ed44fd9e8e31b67" +uuid = "c22f9ab0-d5fe-5066-847c-f4bb1cd4e361" +version = "0.4.1+1" + +[[Xorg_xkbcomp_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_libxkbfile_jll"] +git-tree-sha1 = "4bcbf660f6c2e714f87e960a171b119d06ee163b" +uuid = "35661453-b289-5fab-8a00-3d9160c6a3a4" +version = "1.4.2+4" + +[[Xorg_xkeyboard_config_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Xorg_xkbcomp_jll"] +git-tree-sha1 = "5c8424f8a67c3f2209646d4425f3d415fee5931d" +uuid = "33bec58e-1273-512f-9401-5d533626f822" +version = "2.27.0+4" + +[[Xorg_xtrans_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "79c31e7844f6ecf779705fbc12146eb190b7d845" +uuid = "c5fb5394-a638-5e4d-96e5-b29de1b5cf10" +version = "1.4.0+3" + +[[Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" + +[[Zstd_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "cc4bf3fdde8b7e3e9fa0351bdeedba1cf3b7f6e6" +uuid = "3161d3a3-bdf6-5164-811a-617609db77b4" +version = "1.5.0+0" + +[[Zygote]] +deps = ["AbstractFFTs", "ChainRules", "ChainRulesCore", "DiffRules", "Distributed", "FillArrays", "ForwardDiff", "IRTools", "InteractiveUtils", "LinearAlgebra", "MacroTools", "NaNMath", "Random", "Requires", "SpecialFunctions", "Statistics", "ZygoteRules"] +git-tree-sha1 = "2c30f2df0ba43c17e88c8b55b5b22c401f7cde4e" +uuid = "e88e6eb3-aa80-5325-afca-941959d7151f" +version = "0.6.30" + +[[ZygoteRules]] +deps = ["MacroTools"] +git-tree-sha1 = "8c1a8e4dfacb1fd631745552c8db35d0deb09ea0" +uuid = "700de1a5-db45-46bc-99cf-38207098b444" +version = "0.2.2" + +[[libass_jll]] +deps = ["Artifacts", "Bzip2_jll", "FreeType2_jll", "FriBidi_jll", "HarfBuzz_jll", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "5982a94fcba20f02f42ace44b9894ee2b140fe47" +uuid = "0ac62f75-1d6f-5e53-bd7c-93b484bb37c0" +version = "0.15.1+0" + +[[libfdk_aac_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "daacc84a041563f965be61859a36e17c4e4fcd55" +uuid = "f638f0a6-7fb0-5443-88ba-1cc74229b280" +version = "2.0.2+0" + +[[libpng_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Zlib_jll"] +git-tree-sha1 = "94d180a6d2b5e55e447e2d27a29ed04fe79eb30c" +uuid = "b53b4c65-9356-5827-b1ea-8c7a1a84506f" +version = "1.6.38+0" + +[[libvorbis_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Ogg_jll", "Pkg"] +git-tree-sha1 = "c45f4e40e7aafe9d086379e5578947ec8b95a8fb" +uuid = "f27f6e37-5d2b-51aa-960f-b287f2bc3b7a" +version = "1.3.7+0" + +[[nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" + +[[p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" + +[[x264_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "4fea590b89e6ec504593146bf8b988b2c00922b2" +uuid = "1270edf5-f2f9-52d2-97e9-ab00b5d0237a" +version = "2021.5.5+0" + +[[x265_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "ee567a171cce03570d77ad3a43e90218e38937a9" +uuid = "dfaa095f-4041-5dcd-9319-2fabd8486b76" +version = "3.5.0+0" + +[[xkbcommon_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg", "Wayland_jll", "Wayland_protocols_jll", "Xorg_libxcb_jll", "Xorg_xkeyboard_config_jll"] +git-tree-sha1 = "ece2350174195bb31de1a63bea3a41ae1aa593b6" +uuid = "d8fb68d0-12a3-5cfd-a85a-d49703b185fd" +version = "0.9.1+5" diff --git a/docs_vitepress/src/lectures/lecture_08/Project.toml b/docs_vitepress/src/lectures/lecture_08/Project.toml new file mode 100644 index 00000000..b63eeeab --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/Project.toml @@ -0,0 +1,9 @@ +[deps] +ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" +FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" +GraphRecipes = "bd48cda9-67a9-57be-86fa-5b3c104eda73" +LightGraphs = "093fc24a-ae57-5d10-9952-331d41423f4d" +Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +TikzGraphs = "b4f28e30-c73f-5eaf-a395-8a9db949a742" +TikzPictures = "37f6aa50-8035-52d0-81c2-5a1d08754b2d" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/docs_vitepress/src/lectures/lecture_08/ScalarReverseDiff.jl b/docs_vitepress/src/lectures/lecture_08/ScalarReverseDiff.jl new file mode 100644 index 00000000..0081d4ed --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/ScalarReverseDiff.jl @@ -0,0 +1,96 @@ +mutable struct TrackedReal{T<:Real} + data::T + grad::Union{Nothing,T} + children::Dict + # this field is only need for printing the graph. you can safely remove it. + name::String +end + +track(x::Real,name="") = TrackedReal(x,nothing,Dict(),name) + +function Base.show(io::IO, x::TrackedReal) + t = isempty(x.name) ? "(tracked)" : "(tracked $(x.name))" + print(io, "$(x.data) $t") +end + +function accum!(x::TrackedReal) + if isnothing(x.grad) + x.grad = sum(accum!(v)*w for (v,w) in x.children) + end + x.grad +end + +function gradient(f, args::Real...) + ts = track.(args) + y = f(ts...) + y.grad = 1.0 + accum!.(ts) +end + + +########## RULES ############################################################# + +function Base.:*(a::TrackedReal, b::TrackedReal) + z = track(a.data * b.data, "*") + a.children[z] = b.data # dz/da=b + b.children[z] = a.data # dz/db=a + z +end +function Base.:+(a::TrackedReal{T}, b::TrackedReal{T}) where T + z = track(a.data + b.data, "+") + a.children[z] = one(T) + b.children[z] = one(T) + z +end +function Base.sin(x::TrackedReal) + z = track(sin(x.data), "sin") + x.children[z] = cos(x.data) + z +end + + +########## Optimizion 2D function ############################################ + +using Plots +g(x,y) = y*y + sin(x) +cscheme = cgrad(:RdYlBu_5, rev=true) +p1 = contour(-4:0.1:4, -2:0.1:2, g, fill=true, c=cscheme, xlabel="x", ylabel="y") +display(p1) + + +function descend(f::Function, λ::Real, args::Real...) + Δargs = gradient(f, args...) + args .- λ .* Δargs +end + +function minimize(f::Function, args::T...; niters=20, λ=0.01) where T<:Real + paths = ntuple(_->Vector{T}(undef,niters), length(args)) + for i in 1:niters + args = descend(f, λ, args...) + @info f(args...) + for j in 1:length(args) + paths[j][i] = args[j] + end + end + paths +end + +xs1, ys1 = minimize(g, 1.5, -2.4, λ=0.2, niters=34) +xs2, ys2 = minimize(g, 1.8, -2.4, λ=0.2, niters=16) + +scatter!(p1, [xs1[1]], [ys1[1]], markercolor=:black, marker=:star, ms=7, label="Minimum") +scatter!(p1, [xs2[1]], [ys2[1]], markercolor=:black, marker=:star, ms=7, label=false) +scatter!(p1, [-π/2], [0], markercolor=:red, marker=:star, ms=7, label="Initial Point") +scatter!(p1, xs1[1:1], ys1[1:1], markercolor=:black, label="GD Path", xlims=(-4,4), ylims=(-2,2)) + +anim = @animate for i in 1:max(length(xs1), length(xs2)) + if i <= length(xs1) + scatter!(p1, xs1[1:i], ys1[1:i], mc=:black, lw=3, xlims=(-4,4), ylims=(-2,2), label=false) + end + if i <= length(xs2) + scatter!(p1, xs2[1:i], ys2[1:i], mc=:black, lw=3, label=false) + end + p1 +end + +gif(anim, "gd-path.gif", fps=15) diff --git a/docs_vitepress/src/lectures/lecture_08/anim.gif b/docs_vitepress/src/lectures/lecture_08/anim.gif new file mode 100644 index 00000000..fe8e6be6 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_08/anim.gif differ diff --git a/docs_vitepress/src/lectures/lecture_08/ffnn.jl b/docs_vitepress/src/lectures/lecture_08/ffnn.jl new file mode 100644 index 00000000..2ab40bcb --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/ffnn.jl @@ -0,0 +1,204 @@ +using GLMakie +function flower(n; npetals = 8) + n = div(n, npetals) + x = mapreduce(hcat, (1:npetals) .* (2π/npetals)) do θ + ct = cos(θ) + st = sin(θ) + + x0 = tanh.(randn(1, n) .- 1) .+ 4.0 .+ 0.05.* randn(1, n) + y0 = randn(1, n) .* 0.3 + + x₁ = x0 * cos(θ) .- y0 * sin(θ) + x₂ = x0 * sin(θ) .+ y0 * cos(θ) + vcat(x₁, x₂) + end + _y = mapreduce(i -> fill(i, n), vcat, 1:npetals) + y = zeros(npetals, length(_y)) + foreach(i -> y[_y[i], i] = 1, 1:length(_y)) + Float32.(x), Float32.(y) +end + +x, y = flower(900) +scatter(x[1,:], x[2,:], color = mapslices(argmax, y, dims = 1)[:]) + +####### +# Define a Tracked Array for operator overloading AD +####### +struct TrackedArray{T,N,V<:AbstractArray{T,N}} <: AbstractArray{T,N} + value::V + grad::Union{Nothing,V} + tape::Vector{Any} +end + +TrackedArray(a::AbstractArray) = TrackedArray(a, similar(a) .= 0, []) +TrackedMatrix{T,V} = TrackedArray{T,2,V} where {T,V<:AbstractMatrix{T}} +TrackedVector{T,V} = TrackedArray{T,1,V} where {T,V<:AbstractVector{T}} +Base.size(a::TrackedArray) = size(a.value) +Base.show(io::IO, ::MIME"text/plain", a::TrackedArray) = show(io, a) +Base.show(io::IO, a::TrackedArray) = print(io, "TrackedArray($(size(a.value)))") +value(A::TrackedArray) = A.value +resetgrad!(A::TrackedArray) = (A.grad .= 0; empty!(A.tape)) +value(A) = A +track(A) = TrackedArray(A) +track(a::Number) = TrackedArray(reshape([a], 1, 1)) + +function accum!(A::TrackedArray) + isempty(A.tape) && return(A.grad) + A.grad .= sum(g(accum!(r)) for (r, g) in A.tape) + empty!(A.tape) + A.grad +end + +####### +# Define AD rules for few operations appearing in FFNN +####### +import Base: +, * +import Base.Broadcast: broadcasted +function *(A::TrackedMatrix, B::TrackedMatrix) + a, b = value.((A, B)) + C = track(a * b) + push!(A.tape, (C, Δ -> Δ * b')) + push!(B.tape, (C, Δ -> a' * Δ)) + C +end + +function *(A::TrackedMatrix, B::AbstractMatrix) + a, b = value.((A, B)) + C = track(a * b) + push!(A.tape, (C, Δ -> Δ * b')) + C +end + +function broadcasted(::typeof(+), A::TrackedMatrix, B::TrackedVector) + C = track(value(A) .+ value(B)) + push!(A.tape, (C, Δ -> Δ)) + push!(B.tape, (C, Δ -> sum(Δ, dims = 2)[:])) + C +end + +function σ(x::Real) + t = @fastmath exp(-abs(x)) + y = ifelse(x ≥ 0, inv(1 + t), t / (1 + t)) + ifelse(x > 40, one(y), ifelse(x < -80, zero(y), y)) +end + +broadcasted(::typeof(identity), A::TrackedArray) = A + +function broadcasted(::typeof(σ), A::TrackedArray) + Ω = σ.(value(A)) + C = track(Ω) + push!(A.tape, (C, Δ -> Δ .* Ω .* (1 .- Ω))) + C +end + +function mse(A::TrackedMatrix, B::AbstractMatrix) + n = size(A, 2) + a = value(A) + c = similar(a, 1, 1) + c .= sum((a .- B).^2)/2n + C = track(c) + push!(A.tape, (C, Δ -> Δ .* (a .- B) ./ n)) + C +end + +mse(x::AbstractMatrix, y::AbstractMatrix) = sum((x - y).^2) / (2*size(x,2)) + +####### +# Define a Dense layer +####### +struct Dense{F,W,B} + σ::F + w::W + b::B +end + +Base.show(io::IO, m::Dense) = print(io, "Dense($(size(m.w,2)) → $(size(m.w,1)))") +Dense(i::Int, o::Int, σ = identity) = Dense(σ, randn(Float32, o, i), randn(Float32, o)) +track(m::Dense) = Dense(m.σ, track(m.w), track(m.b)) +track(m::ComposedFunction) = track(m.outer) ∘ track(m.inner) +(m::Dense)(x) = m.σ.(m.w * x .+ m.b) +params(m::ComposedFunction) = vcat(params(m.outer), params(m.inner)) +params(m::Dense) = [m.w, m.b] + +####### +# Let's try to actually train a model +####### +x, y = flower(900) +function initmodel() + m₁ = track(Dense(2, 20, σ)) + m₂ = track(Dense(20, 20, σ)) + m₃ = track(Dense(20, size(y,1))) + m = m₃ ∘ m₂ ∘ m₁ +end +m = initmodel() +m(x) |> value + +###### +# Let's try to learn the parameters +###### +α = 0.01 +ps = params(m) +@elapsed for i in 1:10000 + foreach(resetgrad!, ps) + loss = mse(m(x), y) + fill!(loss.grad, 1) + foreach(accum!, ps) + foreach(x -> x.value .-= α .* x.grad, ps) + mod(i,250) == 0 && println("loss after $(i) iterations = ", sum(value(loss))) +end + +all(mapslices(argmax, value(m(x)), dims = 1)[:] .== mapslices(argmax, y, dims = 1)[:]) +scatter(x[1,:], x[2,:], color = mapslices(argmax, value(m(x)), dims = 1)[:]) + +###### +# Let's try to move the computation to GPU +###### +using CUDA +gpu(x::AbstractArray) = CuArray(x) +gpu(x::TrackedArray) = TrackedArray(CuArray(value(x))) +gpu(m::Dense) = Dense(m.σ, gpu(m.w), gpu(m.b)) +gpu(m::ComposedFunction) = gpu(m.outer) ∘ gpu(m.inner) + +gx, gy = gpu(x), gpu(y) +m = gpu(m) +ps = params(m) +@elapsed for i in 1:10000 + foreach(resetgrad!, ps) + loss = mse(m(gx), gy) + fill!(loss.grad, 1) + foreach(accum!, ps) + foreach(x -> x.value .-= α .* x.grad, ps) + mod(i,250) == 0 && println("loss after $(i) iterations = ", sum(value(loss))) +end + +####### +# Why we see a small speed-up? The problem is small +####### +using BenchmarkTools +p = randn(Float32, 20, 2) +@benchmark $(p) * $(x) +gp = gpu(p) +@benchmark $(gp) * $(gx) + + +###### +# Let's verify the gradients +###### +using FiniteDifferences +ps = [m₃.w, m₃.b, m₂.w, m₂.b, m₁.w, m₁.b] +map(ps) do p + foreach(resetgrad!, ps) + loss = mse(m(x), y) + fill!(loss.grad, 1) + foreach(accum!, ps) + accum!(p) + θ = deepcopy(value(p)) + Δθ = deepcopy(p.grad) + f = θ -> begin + p.value .= θ + value(mse(m(x), y)) + end + sum(abs2.(grad(central_fdm(5, 1), f, θ)[1] - Δθ)) +end + + diff --git a/docs_vitepress/src/lectures/lecture_08/gd-path.gif b/docs_vitepress/src/lectures/lecture_08/gd-path.gif new file mode 100644 index 00000000..216d00b5 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_08/gd-path.gif differ diff --git a/docs_vitepress/src/lectures/lecture_08/graph.png b/docs_vitepress/src/lectures/lecture_08/graph.png new file mode 100644 index 00000000..676c32a8 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_08/graph.png differ diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff.jl b/docs_vitepress/src/lectures/lecture_08/graphdiff.jl new file mode 100644 index 00000000..2a105dea --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff.jl @@ -0,0 +1,69 @@ +using TikzGraphs +using TikzPictures +using GraphRecipes +using LightGraphs +using Plots + +default(size=(1000, 1000)) + +f = :(x * y + sin(x)) + +vertices = Dict([ +1 => (name = "x", x = 1, y = 0, color = :lightblue, ), +2 => (name = "y", x = 0, y = 1, color = :lightblue, ), +3 => (name = "h₁ = sin", x = 1, y = 2, color = :lightblue, ), +4 => (name = "h₂ = *", x = 0, y = 3, color = :lightblue, ), +5 => (name = "h₃ = +", x = 1, y = 4, color = :lightblue, ), +6 => (name = "z", x = 1, y = 5, color = :lightblue, ), +7 => (name = "∂z/∂h₃", x = 2.5, y = 4, color = :orange, ), +8 => (name = "∂h₃/∂h₂", x = 2.5, y = 3, color = :orange, ), +9 => (name = "∂z/∂h₂", x = 3.5, y = 3, color = :orange, ), +10 => (name = "∂h₃/∂h₁", x = 2.5, y = 2, color = :orange, ), +11 => (name = "∂z/∂h₁", x = 3.5, y = 2, color = :orange, ), +12 => (name = "∂h₂/∂y", x = 4.5, y = 1, color = :orange, ), +13 => (name = "∂h₂/∂x + ∂h₁/∂x", x = 4.5, y = 0, color = :orange, ), +14 => (name = "∂z/∂x", x = 6, y = 0, color = :orange, ), +]) + +n = length(vertices) +g = LightGraphs.DiGraph(n) +add_edge!(g, 1, 3) # x -> sin +add_edge!(g, 1, 4) # x -> * +add_edge!(g, 2, 4) # y -> * +add_edge!(g, 3, 5) # sin(x) -> sin(x) + x*y +add_edge!(g, 4, 5) # x*y -> sin(x) + x*y +add_edge!(g, 5, 6) # sin(x) + x*y -> z + +#add_edge!(g, 5, 7,) # ∂z/∂h₃ -> z +#add_edge!(g, 4, 8,) # ∂h₃/∂h₂ -> h₂ = * +#add_edge!(g, 8, 9,) # ∂z/∂h₂ -> ∂h₃/∂h₂ +#add_edge!(g, 7, 9,) # ∂z/∂h₂ -> ∂h₃/∂h₂ +#add_edge!(g, 3, 10,) # ∂h₃/∂h₁ -> h₁ +#add_edge!(g, 10, 11,)# ∂z/∂h₁ -> ∂h₃/∂h₁ +#add_edge!(g, 7, 11,) # ∂z/∂h₁ -> ∂z/∂h₃ +#add_edge!(g, 2, 12,) # ∂h₂/∂y -> y +#add_edge!(g, 9, 12,) # ∂h₂/∂y -> y +#add_edge!(g, 1, 13,) # ∂h₂/∂x + ∂h₁/∂x -> y +#add_edge!(g, 9, 14,) # "∂z/∂x" -> ∂z/∂h₂ +#add_edge!(g, 11, 14,) # "∂z/∂x" -> ∂z/∂h₁ +#add_edge!(g, 13, 14,) # "∂z/∂x" -> ∂h₂/∂x + ∂h₁/∂x +# graphplot(adjacency_matrix(g), +# names = [vertices[i].name for i in 1:n], +# x = 0.25 .* [vertices[i].x for i in 1:n], +# y = 0.25 .* [vertices[i].y falseor i in 1:n], +# curves=false, +# markercolor = [vertices[i].color for i in 1:n], +# ) + +for n in [6, 7, 9, 11, 13, 14] + #graphplot(adjacency_matrix(g)[1:n, 1:n], + # names = [vertices[i].name for i in 1:n], + # x = 0.25 .* [vertices[i].x for i in 1:n], + # y = 0.25 .* [vertices[i].y for i in 1:n], + # curves=false, + # markercolor = [vertices[i].color for i in 1:n], + #) + names = [vertices[i].name for i in 1:length(vertices)] + t = TikzGraphs.plot(g, names) + TikzPictures.save(SVG("graphdiff_$(n).svg"),t) +end diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_11.svg b/docs_vitepress/src/lectures/lecture_08/graphdiff_11.svg new file mode 100644 index 00000000..340a791c --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_11.svg @@ -0,0 +1,218 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_13.svg b/docs_vitepress/src/lectures/lecture_08/graphdiff_13.svg new file mode 100644 index 00000000..e0ce3685 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_13.svg @@ -0,0 +1,259 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_14.svg b/docs_vitepress/src/lectures/lecture_08/graphdiff_14.svg new file mode 100644 index 00000000..d3d94047 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_14.svg @@ -0,0 +1,370 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_14.tex b/docs_vitepress/src/lectures/lecture_08/graphdiff_14.tex new file mode 100644 index 00000000..3480a16c --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_14.tex @@ -0,0 +1,69 @@ +\documentclass{standalone} + +\usepackage{tikz} + +\usepackage{verbatim} +\usetikzlibrary{arrows,shapes} + +\begin{document} +\input{style.tex} + +\begin{tikzpicture}[scale=1.5] + % draw the vertices + \foreach \pos/\label/\name in { + {(1,5)/z/$z$}, + {(1,0)/x/$x$}, + {(0,1)/y/$y$}, + {(1,2)/h1/$h_1 = \sin$}, + {(0,3)/h2/$h_2 = \cdot$}, + {(1,4)/h3/$h_3 = +$}} \node[forward] (\label) at \pos {\name}; + + % \foreach \pos/\label/\name/ in { + % {(1,2)/h1/$h_1 = \sin$}, + % {(0,3)/h2/$h_2 = \cdot$}, + % {(1,4)/h3/$h_3 = +$}} \node[forward] (\label) at \pos {\name}; + % \foreach \pos/\label/\name/\decor in { + % {(1,2)/h1/$\sin$/$h_1$}, + % {(0,3)/h2/$\cdot$/$h_2$}, + % {(1,4)/h3/$+$/$h_3$}} \node[forward,label=above:{\decor}] (\label) at \pos {\name}; + + \foreach \pos/\label/\name in { + {(2.5,2)/dh3dh1/$\frac{\partial h_3}{\partial h_1}$}, + {(3.5,2)/dzdh1/$\frac{\partial z}{\partial h_1}$}, + {(4.5,1)/dh2dy/$\frac{\partial h_2}{\partial y}$}, + {(4.5,0)/dxdx/$\frac{\partial h_2}{\partial x}+\frac{\partial h_1}{\partial x}$}, + {(6,1)/dzdy/$\frac{\partial z}{\partial y}$}, + {(6,0)/dzdx/$\frac{\partial z}{\partial x}$}, + {(3.5,3)/dzdh2/$\frac{\partial z}{\partial h_2}$}, + {(2.5,3)/dh3dh2/$\frac{\partial h_3}{\partial h_2}$}, + {(2.5,4)/dzdh3/$\frac{\partial z}{\partial h_3}$}} \node[reverse] (\label) at \pos {\name}; + + % and the edges + \foreach \source/\dest in { + %{h3/dh3dh1}, + %{h3/dh3dh2}, + {dzdh1/dzdx}, + {dzdh2/dzdx}, + {dzdh2/dzdy}, + {y/dh2dy}, + {x/dxdx}, + {dxdx/dzdx}, + {dh2dy/dzdy}, + {dh3dh1/dzdh1}, + {dzdh3/dzdh1}, + {h1/dh3dh1}, + {h2/dh3dh2}, + {dzdh3/dzdh2}, + {dh3dh2/dzdh2}, + {h3/dzdh3}, + {h1/h3}, + {h2/h3}, + {x/h1}, + {y/h2}, + {h3/z}, + {x/h2}} \path[edge] (\source) -- (\dest); + +\end{tikzpicture} + +\end{document} + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_6.svg b/docs_vitepress/src/lectures/lecture_08/graphdiff_6.svg new file mode 100644 index 00000000..525604b2 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_6.svg @@ -0,0 +1,140 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_6.tex b/docs_vitepress/src/lectures/lecture_08/graphdiff_6.tex new file mode 100644 index 00000000..23dd94ed --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_6.tex @@ -0,0 +1,35 @@ +\documentclass{standalone} + +\usepackage{tikz} + +\usepackage{verbatim} +\usetikzlibrary{arrows,shapes} + + +\begin{document} + +\input{style.tex} + +\begin{tikzpicture}[scale=1.5] + % draw the vertices + \foreach \pos/\label/\name in { + {(1,5)/z/$z$}, + {(1,0)/x/$x$}, + {(0,1)/y/$y$}, + {(1,2)/h1/$h_1 = \sin$}, + {(0,3)/h2/$h_2 = \cdot$}, + {(1,4)/h3/$h_3 = +$}} \node[forward] (\label) at \pos {\name}; + + % and the edges + \foreach \source/\dest in { + {h1/h3}, + {h2/h3}, + {h3/z}, + {x/h1}, + {y/h2}, + {x/h2}} \path[edge] (\source) -- (\dest); + +\end{tikzpicture} + +\end{document} + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_7.svg b/docs_vitepress/src/lectures/lecture_08/graphdiff_7.svg new file mode 100644 index 00000000..53160ff2 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_7.svg @@ -0,0 +1,172 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_7.tex b/docs_vitepress/src/lectures/lecture_08/graphdiff_7.tex new file mode 100644 index 00000000..6858d85d --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_7.tex @@ -0,0 +1,40 @@ +\documentclass{standalone} + +\usepackage{tikz} + +\usepackage{verbatim} +\usetikzlibrary{arrows,shapes} + + +\begin{document} +\input{style.tex} + +\begin{tikzpicture}[scale=1.5] + % draw the vertices + \foreach \pos/\label/\name in { + {(1,5)/z/$z$}, + {(1,0)/x/$x$}, + {(0,1)/y/$y$}, + {(1,2)/h1/$h_1 = \sin$}, + {(0,3)/h2/$h_2 = \cdot$}, + {(1,4)/h3/$h_3 = +$}} \node[forward] (\label) at \pos {\name}; + + + + \foreach \pos/\label/\name in { + {(2.5,4)/dzdh3/$\frac{\partial z}{\partial h_3}$}} \node[reverse] (\label) at \pos {\name}; + + % and the edges + \foreach \source/\dest in { + {h3/dzdh3}, + {h1/h3}, + {h2/h3}, + {x/h1}, + {y/h2}, + {h3/z}, + {x/h2}} \path[edge] (\source) -- (\dest); + +\end{tikzpicture} + +\end{document} + diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_9.svg b/docs_vitepress/src/lectures/lecture_08/graphdiff_9.svg new file mode 100644 index 00000000..d30f5185 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_9.svg @@ -0,0 +1,218 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_08/graphdiff_9.tex b/docs_vitepress/src/lectures/lecture_08/graphdiff_9.tex new file mode 100644 index 00000000..2f98c412 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/graphdiff_9.tex @@ -0,0 +1,43 @@ +\documentclass{standalone} + +\usepackage{tikz} + +\usepackage{verbatim} +\usetikzlibrary{arrows,shapes} + + +\begin{document} +\input{style.tex} + +\begin{tikzpicture}[scale=1.5] + % draw the vertices + \foreach \pos/\label/\name in { + {(1,5)/z/$z$}, + {(1,0)/x/$x$}, + {(0,1)/y/$y$}, + {(1,2)/h1/$h_1 = \sin$}, + {(0,3)/h2/$h_2 = \cdot$}, + {(1,4)/h3/$h_3 = +$}} \node[forward] (\label) at \pos {\name}; + + \foreach \pos/\label/\name in { + {(3.5,3)/dzdh2/$\frac{\partial z}{\partial h_2}$}, + {(2.5,3)/dh3dh2/$\frac{\partial h_3}{\partial h_2}$}, + {(2.5,4)/dzdh3/$\frac{\partial z}{\partial h_3}$}} \node[reverse] (\label) at \pos {\name}; + + % and the edges + \foreach \source/\dest in { + {h2/dh3dh2}, + {dzdh3/dzdh2}, + {dh3dh2/dzdh2}, + {h3/dzdh3}, + {h1/h3}, + {h2/h3}, + {x/h1}, + {y/h2}, + {h3/z}, + {x/h2}} \path[edge] (\source) -- (\dest); + +\end{tikzpicture} + +\end{document} + diff --git a/docs_vitepress/src/lectures/lecture_08/hw.md b/docs_vitepress/src/lectures/lecture_08/hw.md new file mode 100644 index 00000000..02272218 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/hw.md @@ -0,0 +1,103 @@ +# [Homework 08](@id hw08) + +In this homework you will write an additional rule for our scalar reverse AD +from the lab. For this homework, please write all your code in one file `hw.jl` +which you have to zip and upload to BRUTE as usual. The solution to the lab is +below. + +```@example hw08 +mutable struct TrackedReal{T<:Real} + data::T + grad::Union{Nothing,T} + children::Dict + # this field is only need for printing the graph. you can safely remove it. + name::String +end + +track(x::Real,name="") = TrackedReal(x,nothing,Dict(),name) + +function Base.show(io::IO, x::TrackedReal) + t = isempty(x.name) ? "(tracked)" : "(tracked $(x.name))" + print(io, "$(x.data) $t") +end + +function accum!(x::TrackedReal) + if isnothing(x.grad) + x.grad = sum(w*accum!(v) for (v,w) in x.children) + end + x.grad +end + +function Base.:*(a::TrackedReal, b::TrackedReal) + z = track(a.data * b.data, "*") + a.children[z] = b.data # dz/da=b + b.children[z] = a.data # dz/db=a + z +end + +function Base.:+(a::TrackedReal{T}, b::TrackedReal{T}) where T + z = track(a.data + b.data, "+") + a.children[z] = one(T) + b.children[z] = one(T) + z +end + +function Base.sin(x::TrackedReal) + z = track(sin(x.data), "sin") + x.children[z] = cos(x.data) + z +end + +function gradient(f, args::Real...) + ts = track.(args) + y = f(ts...) + y.grad = 1.0 + accum!.(ts) +end +``` + +We will use it to compute the derivative of the Babylonian square root. + +```@example hw08 +babysqrt(x, t=(1+x)/2, n=10) = n==0 ? t : babysqrt(x, (t+x/t)/2, n-1) +nothing # hide +``` + +In order to differentiate through `babysqrt` you will need a reverse rule for `/` +for `Base.:/(TrackedReal,TrackedReal)` as well as the cases where you divide with +constants in volved (e.g. `Base.:/(TrackedReal,Real)`). + +::: danger Homework (2 points) + +Write the reverse rules for `/` and the missing rules for `+` such that you can +differentiate through division and addition with and without constants. + +::: + +```@setup hw08 +function Base.:/(a::TrackedReal, b::TrackedReal) + z = track(a.data / b.data) + a.children[z] = 1/b.data + b.children[z] = -a.data / b.data^2 + z +end +function Base.:/(a::TrackedReal, b::Real) + z = track(a.data/b) + a.children[z] = 1/b + z +end + +function Base.:+(a::Real, b::TrackedReal{T}) where T + z = track(a + b.data, "+") + b.children[z] = one(T) + z +end +Base.:+(a::TrackedReal,b::Real) = b+a +``` + +You can verify your solution with the `gradient` function. + +```@repl hw08 +gradient(babysqrt, 2.0) +1/(2babysqrt(2.0)) +``` diff --git a/docs_vitepress/src/lectures/lecture_08/lab.md b/docs_vitepress/src/lectures/lecture_08/lab.md new file mode 100644 index 00000000..470d8275 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/lab.md @@ -0,0 +1,711 @@ +# Lab 08 - Reverse Mode Differentiation + +![descend](gd-path.gif) + +In the lecture you have seen how to implement *forward-mode* automatic +differentiation (AD). Assume you want to find the derivative $\frac{df}{dx}$ of +the function $f:\mathbb R^2 \rightarrow \mathbb R$ + +```@example lab08 +f(x,y) = x*y + sin(x) +nothing # hide +``` + +If we have rules for `*`, `+`, and `sin` we could simply *seed* the function with +`Dual(x,one(x))` and read out the derivative $\frac{df}{dx}$ from the `Dual` that is returned +by `f`. If we are also interested in the derivative $\frac{df}{dy}$ we will have +to run `f` again, this time seeding the second argument with `Dual(y,one(y))`. +Hence, we have to evaluate `f` *twice* if we want derivatives w.r.t to both its +arguments which means that forward differentiation scales as $O(N)$ where $N$ is +the number of inputs to `f`. + +```julia +dfdx = f(Dual(x,one(x)), Dual(y,zero(y))) +dfdy = f(Dual(x,zero(x)), Dual(y,one(y))) +``` + +*Reverse-mode* AD can compute gradients of functions with many inputs and one +output in one go. This is great because very often we want to optimize loss +functions which are exactly that: Functions with many input variables and one +loss output. + +## Reverse Mode AD + +With functions $f:\mathbb R^N\rightarrow\mathbb R^M$ and $g:\mathbb +R^L\rightarrow \mathbb R^N$ with an input vector $\mathbf{x}$ we can define the +composition of $f$ and $g$ as + +```math +\mathbf{z} = (f \circ g)(\mathbf{x}), \qquad \text{where} \qquad \mathbf{y}=g(\mathbf{x}), \qquad \mathbf{z} = f(\mathbf{y}). +``` + +The multivariate chainrule reads + +```math +\left.\frac{\partial z_i}{\partial x_j}\right|_{\mathbf{x}} = + \sum_{k=1}^N \left.\frac{\partial z_i}{\partial y_k}\right|_{\mathbf{y}} + \left.\frac{\partial y_k}{\partial x_i}\right|_{\mathbf{x}} +``` + +If you want to read about where this comes from you can check +[here](https://math.stackexchange.com/questions/3785018/intuitive-proof-of-the-multivariable-chain-rule) +or [here](https://people.math.harvard.edu/~shlomo/docs/Advanced_Calculus.pdf). +It is essentially one row of the *Jacobian matrix* $J$. +Note that in order to compute the derivative we always have to know the input +to the respective function, because we can only compute the derivative *at a specific point* +(denoted by the $|_x$ $_{}$ notation). For our example + +```math +z = f(x,y) = xy + \sin(x) +``` + +with the sub-functions $g(x,y)=xy$ and $h(x)=\sin(x)$ we get + +```math +\left.{\frac {df}{dx}}\right|_{x,y} + = \left.{\frac {df}{dg}}\right|_{g(x,y)}\cdot \left.{\frac {dg}{dx}}\right|_{x,y} + + \left.{\frac {df}{dh}}\right|_{h(x)}\cdot \left.{\frac {dh}{dx}}\right|_{x} + = 1 \cdot y |_{y} + 1\cdot\cos(x)|_{x}. +``` + +You can see that, in order to implement reverse-mode AD we have to trace and +remember all inputs to our intermediate functions during the forward pass such +that we can compute their gradients during the backward pass. The simplest way +of doing this is by dynamically building a computation graph which tracks how +each input variable affects its output variables. The graph below represents +the computation of our function `f`. + +```julia +z = x*y + sin(x) + +# as a Wengert list # Partial derivatives +a = x*y # da/dx = y; da/dy = x +b = sin(x) # db/dx = cos(x) +z = a + b # dz/da = 1; dz/db = 1 +``` + +![graph](graph.png) + +In the graph you can see that the variable `x` can directly affect `b` and `a`. +Hence, `x` has two children `a` and `b`. During the forward pass we build the +graph, keeping track of which input affects which output. Additionally we +include the corresponding local derivatives (which we can already compute). +To implement a dynamically built graph we can introduce a new number type +`TrackedReal` which has three fields: +* `data` contains the value of this node in the computation graph as obtained + in the forward pass. +* `grad` is initialized to `nothing` and will later hold the accumulated gradients (the sum in the multivariate chain rule) +* `children` is a `Dict` that keeps track which output variables are affected + by the current node and also stores the corresponding local derivatives + $\frac{\partial f}{\partial g_k}$. + +```@example lab08 +mutable struct TrackedReal{T<:Real} + data::T + grad::Union{Nothing,T} + children::Dict + # this field is only need for printing the graph. you can safely remove it. + name::String +end + +track(x::Real,name="") = TrackedReal(x,nothing,Dict(),name) + +function Base.show(io::IO, x::TrackedReal) + t = isempty(x.name) ? "(tracked)" : "(tracked $(x.name))" + print(io, "$(x.data) $t") +end +``` + +The backward pass is nothing more than the application of the chainrule. To +compute the derivative. Assuming we know how to compute the *local derivatives* +$\frac{\partial f}{\partial g_k}$ for simple functions +such as `+`, `*`, and `sin`, we can write a simple function that implements +the gradient accumulation from above via the chainrule + +```math +\left.\frac{\partial f}{\partial x_i}\right|_{\mathbf{x}} = + \sum_{k=1}^N \left.\frac{\partial f}{\partial g_k}\right|_{\mathbf{g}(\mathbf{x})} + \left.\frac{\partial g_k}{\partial x_i}\right|_{\mathbf{x}}. +``` + +We just have to loop over all children, collect the local derivatives, and +recurse: + +```@example lab08 +function accum!(x::TrackedReal) + if isnothing(x.grad) + x.grad = sum(w*accum!(v) for (v,w) in x.children) + end + x.grad +end +nothing # hide +``` + +where `w` corresponds to $\frac{\partial f}{\partial g_k}$ and `accum!(v)` corresponds +to $\frac{\partial g_k}{\partial x_i}$. At this point we have already implemented +the core functionality of our first reverse-mode AD! The only thing left to do +is implement the reverse rules for basic functions. Via recursion the +chainrule is applied until we arrive at the final output `z`. This final +output has to be seeded (just like with forward-mode) with $\frac{\partial +z}{\partial z}=1$. + + +### Writing Reverse Rules + +Lets start by overloading the three functions `+`, `*`, and `sin` that we need +to build our computation graph. First, we have to track the forward computation +and then we *register* the output `z` as a child of its inputs by using `z` +as a key in the dictionary of children. The corresponding value holds the +derivatives, in the case of multiplication case we simply have + +```math +z = a \cdot b +``` + +for which the derivatives are + +```math +\frac{\partial z}{\partial a}=b, \qquad +\frac{\partial z}{\partial b}=a. +``` + +Knowing the derivatives of `*` at a given point we can write our reverse rule + +```@example lab08 +function Base.:*(a::TrackedReal, b::TrackedReal) + z = track(a.data * b.data, "*") + a.children[z] = b.data # dz/da=b + b.children[z] = a.data # dz/db=a + z +end +``` + +Creating two tracked numbers and adding them results in + +```@repl lab08 +x = track(2.0) +y = track(3.0) +z = x*y +x.children +y.children +``` + + +::: warning Exercise + + +Implement the two remaining rules for `+` and `sin` by overloading the +appropriate methods like we did for `*`. First you have to compute the tracked +forward pass, and then register the local derivatives in the children of your +input variables. Remember to return the tracked result of the forward pass in +the end. + +::: + +::: details Show solution + +```@example lab08 +function Base.:+(a::TrackedReal{T}, b::TrackedReal{T}) where T + z = track(a.data + b.data, "+") + a.children[z] = one(T) + b.children[z] = one(T) + z +end + +function Base.sin(x::TrackedReal) + z = track(sin(x.data), "sin") + x.children[z] = cos(x.data) + z +end +``` + +::: + + +### Forward & Backward Pass + +To visualize that with reverse-mode AD we really do save computation we can +visualize the computation graph at different stages. We start with the forward +pass and keep observing `x` +```@setup lab08 +using AbstractTrees +AbstractTrees.children(v::TrackedReal) = v.children |> keys |> collect +function AbstractTrees.printnode(io::IO,v::TrackedReal) + print(io,"$(v.name) data: $(round(v.data,digits=2)) grad: $(v.grad)") +end +``` +```@repl lab08 +x = track(2.0,"x"); +y = track(3.0,"y"); +a = x*y; +print_tree(x) +``` +We can see that we `x` now has one child `a` which has the value `2.0*3.0==6.0`. All the +gradients are still `nothing`. Computing another value that depends on `x` +will add another child. +```@repl lab08 +b = sin(x) +print_tree(x) +``` +In the final step we compute `z` which does not mutate the children of `x` +because it does not depend directly on it. The result `z` is added as a child +to both `a` and `b`. +```@repl lab08 +z = a + b +print_tree(x) +``` + +For the backward pass we have to seed the initial gradient value of `z` and +call `accum!` on the variable that we are interested in. +```@repl lab08 +z.grad = 1.0 +dx = accum!(x) +dx ≈ y.data + cos(x.data) +``` + +By accumulating the gradients for `x`, the gradients in the sub-tree connected +to `x` will be evaluated. The parts of the tree that are only connected to `y` +stay untouched. +```@repl lab08 +print_tree(x) +print_tree(y) +``` +If we now accumulate the gradients over `y` we re-use the gradients that are already +computed. In larger computations this will save us *a lot* of effort! + +::: tip + +This also means that we have to re-build the graph for every new set of inputs! + +::: + +### Optimizing 2D Functions + +::: warning Exercise + +Implement a function `gradient(f, args::Real...)` which takes a function `f` +and its corresponding arguments (as `Real` numbers) and outputs the corresponding +gradients + +::: + +::: details Show solution + +```@example lab08 +function gradient(f, args::Real...) + ts = track.(args) + y = f(ts...) + y.grad = 1.0 + accum!.(ts) +end +nothing # hide +``` + +::: + +```@repl lab08 +f(x,y) = x*y + sin(x) +gradient(f, 2.0, 3.0) +``` + +As an example we can find a local minimum of the function `g` (slightly +modified to show you that we can now actually do *automatic* differentiation). + +```@example lab08 +g(x,y) = y*y + sin(x) + +using Plots +color_scheme = cgrad(:RdYlBu_5, rev=true) +contour(-4:0.1:4, -2:0.1:2, g, fill=true, c=color_scheme, xlabel="x", ylabel="y") +``` + +We can find a local minimum of $g$ by starting at an initial point $(x_0,y_0)$ +and taking small steps in the opposite direction of the gradient + +```math +\begin{align} +x_{i+1} &= x_i - \lambda \frac{\partial f}{\partial x_i} \\ +y_{i+1} &= y_i - \lambda \frac{\partial f}{\partial y_i}, +\end{align} +``` + +where $\lambda$ is the learning rate that has to be tuned manually. + +::: warning Exercise + +Implement a function `descend` performs one step of Gradient Descent (GD) on a +function `f` with an arbitrary number of inputs. For GD you also have to +specify the learning rate $\lambda$ so the function signature should look like +this + +```julia +descend(f::Function, λ::Real, args::Real...) +``` + +::: + +::: details Show solution + +```@example lab08 +function descend(f::Function, λ::Real, args::Real...) + Δargs = gradient(f, args...) + args .- λ .* Δargs +end +nothing # hide +``` + +::: + + +Running one `descend` step should result in two new inputs with a smaller output +for `g` + +```@repl lab08 +g(1.0, 1.0) +(x,y) = descend(g, 0.2, 1.0, 1.0) +g(x,y) +``` + +You can `minimize` a `g` starting from an initial value. Below is a code +snippet that performs a number of `descend` steps on two different initial +points and creates an animation of each step of the GD algorithm. + +```julia +function minimize(f::Function, args::T...; niters=20, λ=0.01) where T<:Real + paths = ntuple(_->Vector{T}(undef,niters), length(args)) + for i in 1:niters + args = descend(f, λ, args...) + @info f(args...) + for j in 1:length(args) + paths[j][i] = args[j] + end + end + paths +end + +xs1, ys1 = minimize(g, 1.5, -2.4, λ=0.2, niters=34) +xs2, ys2 = minimize(g, 1.8, -2.4, λ=0.2, niters=16) + +p1 = contour(-4:0.1:4, -2:0.1:2, g, fill=true, c=color_scheme, xlabel="x", ylabel="y") +scatter!(p1, [xs1[1]], [ys1[1]], mc=:black, marker=:star, ms=7, label="Minimum") +scatter!(p1, [xs2[1]], [ys2[1]], mc=:black, marker=:star, ms=7, label=false) +scatter!(p1, [-π/2], [0], mc=:red, marker=:star, ms=7, label="Initial Point") +scatter!(p1, xs1[1:1], ys1[1:1], mc=:black, label="GD Path", xlims=(-4,4), ylims=(-2,2)) + +@gif for i in 1:max(length(xs1), length(xs2)) + if i <= length(xs1) + scatter!(p1, xs1[1:i], ys1[1:i], mc=:black, lw=3, xlims=(-4,4), ylims=(-2,2), label=false) + end + if i <= length(xs2) + scatter!(p1, xs2[1:i], ys2[1:i], mc=:black, lw=3, label=false) + end + p1 +end +``` + +![descend](gd-path.gif) + +--- + +At this point you can move to the [homework](@ref hw08) of this lab. If you want to +know how to generalize this simple reverse AD to work with functions that +operate on `Array`s, feel free to continue with the remaining **volutary part +of the lab**. + +--- + +## Naively Vectorized Reverse AD + +A naive solution to use our `TrackedReal` number type to differentiate +functions that operate on vectors is to just use `Array{<:TrackedReal}`. +Unfortunately, this means that we have to replace the fast BLAS matrix +operations with our own matrix multiplication methods that know how to deal +with `TrackedReal`s. This results in large performance hits and your task +during the rest of the lab is to implement a smarter solution to this problem. + +```@example lab08 +using LinearAlgebra +Base.zero(::TrackedReal{T}) where T = TrackedReal(zero(T)) +LinearAlgebra.adjoint(x::TrackedReal) = x +track(x::Array) = track.(x) +accum!(xs::Array{<:TrackedReal}) = accum!.(xs) + +const VecTracked = AbstractVector{<:TrackedReal} +const MatTracked = AbstractMatrix{<:TrackedReal} + +LinearAlgebra.dot(xs::VecTracked, ys::VecTracked) = mapreduce(*, +, xs, ys) +Base.:*(X::MatTracked, y::VecTracked) = map(x->dot(x,y), eachrow(X)) +Base.:*(X::MatTracked, Y::MatTracked) = mapreduce(y->X*y, hcat, eachcol(Y)) +Base.sum(xs::AbstractArray{<:TrackedReal}) = reduce(+,xs) + +function reset!(x::TrackedReal) + x.grad = nothing + reset!.(keys(x.children)) + x.children = Dict() +end + +X = rand(2,3) +Y = rand(3,2) + +function run() + Xv = track(X) + Yv = track(Y) + z = sum(Xv * Yv) + z.grad = 1.0 + accum!(Yv) +end +``` + +```julia +julia> using BenchmarkTools + +julia> @benchmark run() +BenchmarkTools.Trial: 10000 samples with 1 evaluation. + Range (min … max): 44.838 μs … 8.404 ms ┊ GC (min … max): 0.00% … 98.78% + Time (median): 48.680 μs ┊ GC (median): 0.00% + Time (mean ± σ): 53.048 μs ± 142.403 μs ┊ GC (mean ± σ): 4.61% ± 1.71% + + ▃▆█▃ + ▂▁▁▂▂▃▆████▇▅▄▄▄▄▄▅▅▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂ ▃ + 44.8 μs Histogram: frequency by time 66.7 μs < + + Memory estimate: 26.95 KiB, allocs estimate: 392. +``` + +## Reverse AD with `TrackedArray`s + +To make use of the much faster BLAS methods we have to implement a custom array +type which will offload the heavy matrix multiplications to the normal matrix +methods. Start with a **fresh REPL** and possibly a **new file** that only +contains the definition of our `TrackedReal`: + +```@example diff +mutable struct TrackedReal{T<:Real} + data::T + grad::Union{Nothing,T} + children::Dict +end + +track(x::Real) = TrackedReal(x, nothing, Dict()) +nothing # hide +``` + +::: warning Exercise + +Define a new `TrackedArray` type which subtypes and `AbstractArray{T,N}` and contains +the three fields: `data`, `grad`, and `children`. Which type should `grad` have? + +Additionally define `track(x::Array)`, and forward `size`, `length`, and `eltype` +to `x.data` (maybe via metaprogrammming? ;). + +::: + +::: details Show solution + +```@example diff +mutable struct TrackedArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N} + data::A + grad::Union{Nothing,A} + children::Dict +end + +track(x::Array) = TrackedArray(x, nothing, Dict()) +track(x::Union{TrackedArray,TrackedReal}) = x + +for f in [:size, :length, :eltype] + eval(:(Base.$(f)(x::TrackedArray, args...) = $(f)(x.data, args...))) +end + +# only needed for hashing in the children dict... +Base.getindex(x::TrackedArray, args...) = getindex(x.data,args...) + +# pretty print TrackedArray +Base.show(io::IO, x::TrackedArray) = print(io, "Tracked $(x.data)") +Base.print_array(io::IO, x::TrackedArray) = Base.print_array(io, x.data) +``` + +::: + +Creating a `TrackedArray` should work like this: + +```@repl diff +track(rand(2,2)) +``` + +```@example diff +function accum!(x::Union{TrackedReal,TrackedArray}) + if isnothing(x.grad) + x.grad = sum(λ(accum!(Δ)) for (Δ,λ) in x.children) + end + x.grad +end +``` + +To implement the first rule for `*` i.e. matrix multiplication we would first +have to derive it. In the case of general matrix multiplication (which is a function +$(R^{N\times M}, R^{M\times L}) \rightarrow R^{N\times L}$) we are not dealing +with simple derivatives anymore, but with a so-called *pullback* which takes a +*wobble* in the output space $R^{N\times L}$ and returns a *wiggle* in the input space +(either $R^{N\times M}$ or $R^{M\times L}$). + +Luckily +[`ChainRules.jl`](https://juliadiff.org/ChainRulesCore.jl/dev/arrays.html) has +a nice guide on how to derive array rules, so we will only state the solution +for the reverse rule such that you can implement it. They read: + +```math +\bar A = \bar\Omega B^T, \qquad \bar B = A^T\bar\Omega +``` + +Where $\bar\Omega$ is the given output *wobble*, which in the simplest case can +be the seeded value of the final node. The crucial problem to note here is that +the two rules rely in $\bar\Omega$ being multiplied *from different sides*. +This information would be lost if would just store $B^T$ as the pullback for +$A$. Hence we will store our pullbacks as closures: + +```julia +Ω̄ -> Ω̄ * B' +Ω̄ -> A' * Ω̄ +``` + +::: warning Exercise + +Define the pullback for matrix multiplication i.e. `Base.:*(A::TrackedArray,B::TrackedArray)` +by computing the primal and storing the partials as closures. + +::: + +::: details Show solution + +```@example diff +function Base.:*(X::TrackedArray, Y::TrackedArray) + Z = track(X.data * Y.data) + X.children[Z] = Δ -> Δ * Y.data' + Y.children[Z] = Δ -> X.data' * Δ + Z +end +``` + +::: + +```@repl diff +X = rand(2,3) |> track +Y = rand(3,2) |> track +Z = X*Y +f = X.children[Z] +Ω̄ = ones(size(Z)...) +f(Ω̄) +Ω̄*Y.data' +``` + +::: warning Exercise + +Implement rules for `sum`, `+`, `-`, and `abs2`. + +::: + +::: details Show solution + +```@example diff +function Base.sum(x::TrackedArray) + z = track(sum(x.data)) + x.children[z] = Δ -> Δ*ones(eltype(x), size(x)...) + z +end + +function Base.:+(X::TrackedArray, Y::TrackedArray) + Z = track(X.data + Y.data) + X.children[Z] = Δ -> Δ + Y.children[Z] = Δ -> Δ + Z +end + +function Base.:-(X::TrackedArray, Y::TrackedArray) + Z = track(X.data - Y.data) + X.children[Z] = Δ -> Δ + Y.children[Z] = Δ -> -Δ + Z +end + +function Base.abs2(x::TrackedArray) + y = track(abs2.(x.data)) + x.children[y] = Δ -> Δ .* 2x.data + y +end +``` + +::: + +```@example diff +X = rand(2,3) +Y = rand(3,2) +function run() + Xv = track(X) + Yv = track(Y) + z = sum(Xv * Yv) + z.grad = 1.0 + accum!(Yv) +end +nothing # hide +``` + +```julia +julia> using BenchmarkTools + +julia> @benchmark run() +BenchmarkTools.Trial: 10000 samples with 6 evaluations. + Range (min … max): 5.797 μs … 1.618 ms ┊ GC (min … max): 0.00% … 98.97% + Time (median): 6.530 μs ┊ GC (median): 0.00% + Time (mean ± σ): 7.163 μs ± 22.609 μs ┊ GC (mean ± σ): 4.42% ± 1.40% + + ▆█▇▇▇▆▅▄▃▃▂▂▂▁▁ ▁▁ ▂ + █████████████████████▇▇▇▆▆▅▅▅▅▆▅▄▅▅▄▁▃▁▁▄▁▃▁▁▁▃▃▄▁▁▁▄▁▃▁▅▄ █ + 5.8 μs Histogram: log(frequency) by time 15.8 μs < + + Memory estimate: 3.08 KiB, allocs estimate: 31. +``` + +Even for this tiny example we are already 10 times faster than with the naively +vectorized approach! + +In order to implement a full neural network we need two more rules. One for +the non-linearity and one for concatentation of individual training points to +a batch. + +```@example diff +σ(x::Real) = 1/(1+exp(-x)) +σ(x::AbstractArray) = σ.(x) +function σ(x::TrackedArray) + z = track(σ(x.data)) + d = z.data + x.children[z] = Δ -> Δ .* d .* (1 .- d) + z +end + +function Base.hcat(xs::TrackedArray...) + y = track(hcat(data.(xs)...)) + stops = cumsum([size(x,2) for x in xs]) + starts = vcat([1], stops[1:end-1] .+ 1) + for (start,stop,x) in zip(starts,stops,xs) + x.children[y] = function (Δ) + δ = if ndims(x) == 1 + Δ[:,start] + else + ds = map(_ -> :, size(x)) |> Base.tail |> Base.tail + Δ[:, start:stop, ds...] + end + δ + end + end + y +end +``` + +You can see a full implementation of our tracing based AD [here](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/src/ReverseDiff.jl) +and a simple implementation of a Neural Network that can learn an approximation +to the function `g` [here](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/docs/src/lecture_08/reverse-nn.jl). Running the latter script will produce an animation +that shows how the network is learning. + +![anim](anim.gif) + +This lab is heavily inspired by [Rufflewind](https://rufflewind.com/2016-12-30/reverse-mode-automatic-differentiation) diff --git a/docs_vitepress/src/lectures/lecture_08/lecture.md b/docs_vitepress/src/lectures/lecture_08/lecture.md new file mode 100644 index 00000000..d62c2553 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/lecture.md @@ -0,0 +1,685 @@ +```@setup lec08 +using Plots +``` +# Automatic Differentiation + +## Motivation + +- It supports a lot of modern machine learning by allowing quick differentiation of complex mathematical functions. The 1st order optimization methods are ubiquitous in finding parameters of functions (not only in deep learning). +- AD is interesting to study from both mathermatical and implementation perspective, since different approaches comes with different trade-offs. Julia offers many implementations (some of them are not maintained anymore), as it showed to implement (simple) AD is relatively simple. +- We (authors of this course) believe that it is good to understand (at least roughly), how the methods work in order to use them effectively in your work. +- Julia is unique in the effort separating definitions of AD rules from AD engines that use those rules to perform the AD and the backend which executes the rules This allows authors of generic libraries to add new rules that would be compatible with many frameworks. See [juliadiff.org](https://juliadiff.org/) for a list. + +## Theory + +The differentiation is routine process, as most of the time we break complicated functions down into small pieces that we know, how to differentiate and from that to assemble the gradient of the complex function back. Thus, the essential piece is the differentiation of the composed function ``f: \mathbb{R}^n \rightarrow \mathbb{R}^m`` + +```math +f(x) = f_1(f_2(f_3(\ldots f_n(x)))) = (f_1 \circ f_2 \circ \ldots \circ f_n)(x) +``` + +which is computed by chainrule. Before we dive into the details, let's define the notation, which for the sake of clarity needs to be precise. The gradient of function ``f(x)`` with respect to ``x`` at point ``x_0`` is denoted as +``\left.\frac{\partial f}{\partial x}\right|_{x^0}`` + +For a composed function ``f(x)`` the gradient with respect to ``x`` at point ``x_0`` is equal to + +```math +\left.\frac{\partial f}{\partial x}\right|_{x^0} = \left.\frac{f_1}{\partial y_1}\right|_{y_1^0} \times \left.\frac{f_2}{\partial y_2}\right|_{y_2^0} \times \ldots \times \left.\frac{f_n}{\partial y_n}\right|_{y_n^0}, +``` + +where ``y_i`` denotes the input of function ``f_i`` and + +```math +\begin{alignat*}{2} +y_i^0 = &\ \left(f_{i+1} \circ \ldots \circ f_n\right) (x^0) \\ +y_n^0 = &\ x^0 \\ +y_0^0 = &\ f(x^0) \\ +\end{alignat*} +``` + +How ``\left.\frac{f_i}{\partial y_i}\right|_{y_i^0}`` looks like? + +- If ``f_i: \mathbb{R} \rightarrow \mathbb{R}``, then ``\frac{f_i}{\partial y_i} \in \mathbb{R}`` is a real number ``\mathbb{R}`` and we live in a high-school world, where it was sufficient to multiply real numbers. +- If ``f_i: \mathbb{R}^{m_i} \rightarrow \mathbb{R}^{n_i}``, then ``\mathbf{J}_i = \left.\frac{f_i}{\partial y_i}\right|_{y_i^0} \in \mathbb{R}^{n_i,m_i}`` is a matrix with ``m_i`` rows and ``n_i`` columns. + +The computation of gradient ``\frac{\partial f}{\partial x}`` *theoretically* boils down to + 1. computing Jacobians ``\left\{\mathbf{J}_i\right\}_{i=1}^n`` + 2. multiplication of Jacobians as it holds that ``\left.\frac{\partial f}{\partial x}\right|_{y_0} = J_1 \times J_2 \times \ldots \times J_n``. + +The complexity of the computation (at least one part of it) is therefore determined by the Matrix multiplication, which is generally expensive, as theoretically it has complexity at least ``O(n^{2.3728596}),`` but in practice a little bit more as the lower bound hides the devil in the ``O`` notation. The order in which the Jacobians are multiplied has therefore a profound effect on the complexity of the AD engine. While determining the optimal order of multiplication of sequence of matrices is costly, in practice, we recognize two important cases. + +1. Jacobians are multiplied from right to left as ``J_1 \times (J_2 \times ( \ldots \times (J_{n-1} \times J_n) \ldots))`` which has the advantage when the input dimension of ``f: \mathbb{R}^n \rightarrow \mathbb{R}^m`` is smaller than the output dimension, ``n < m``. - referred to as the **FORWARD MODE** +2. Jacobians are multiplied from left to right as ``( \ldots ((J_1 \times J_2) \times J_3) \times \ldots ) \times J_n`` which has the advantage when the input dimension of ``f: \mathbb{R}^n \rightarrow \mathbb{R}^m`` is larger than the output dimension, ``n > m``. - referred to as the **BACKWARD MODE** + +The ubiquitous in machine learning to minimization of a scalar (loss) function of a large number of parameters. Also notice that for `f` of certain structures, it pays-off to do a mixed-mode AD, where some parts are done using forward diff and some parts using reverse diff. + +### Example + +Let's workout an example + +```math +z = xy + sin(x) +``` + +How it maps to the notation we have used above? Particularly, what are ``f_1, f_2, \ldots, f_n`` and the corresponding ``\{y_i\}_{i=1}^n``, such that ``(f_1 \circ f_2 \circ \ldots \circ f_n)(x,y) = xy + sin(x)`` ? + +```math +\begin{alignat*}{6} +f_1:&\mathbb{R}^2 \rightarrow \mathbb{R} \quad&f_1(y_1)& = y_{1,1} + y_{1,2} \quad & y_0 = & (xy + \sin(x)) \\ +f_2:&\mathbb{R}^3 \rightarrow \mathbb{R}^2 \quad&f_2(y_2)& = (y_{2,1}y_{2,2}, y_{2,3}) \quad & y_1 = & (xy, \sin(x))&\\ +f_3:& \mathbb{R}^2 \rightarrow \mathbb{R}^3 \quad&f_3(y_3)& = (y_{3,1}, y_{3,2}, \sin(y_{3,1})) \quad & y_2 =& (x, y, \sin(x))\\ +\end{alignat*} +``` + +The corresponding jacobians are + +```math +\begin{alignat*}{4} +f_1(y_1) & = y_{1,1} + y_{1,2} \quad & \mathbf{J}_1& = \begin{bmatrix} 1 \\ 1 \end{bmatrix} \\ +f_2(y_2) & = (y_{2,1}y_{2,2}, y_{2,3}) \quad & \mathbf{J}_2& = \begin{bmatrix} y_{2, 2} & 0 \\ y_{2,1} & 0 \\ 0 & 1 \end{bmatrix}\\ +f_3(y_3) & = (y_{3,1}, y_{3,2}, \sin(y_{3,1})) \quad & \mathbf{J}_3 & = \begin{bmatrix} 1 & 0 & \cos(y_{3,1}) \\ 0 & 1 & 0 \end{bmatrix} \\ +\end{alignat*} +``` + +and for the gradient it holds that + +```math +\begin{bmatrix} \frac{\partial f(x, y)}{\partial{x}} \\ \frac{\partial f(x,y)}{\partial{y}} \end{bmatrix} = \mathbf{J}_3 \times \mathbf{J}_2 \times \mathbf{J}_1 = \begin{bmatrix} 1 & 0 & \cos(x) \\ 0 & 1 & 0 \end{bmatrix} \\ \times \begin{bmatrix} y & 0 \\ x & 0 \\ 0 & 1 \end{bmatrix} \times \begin{bmatrix} 1 \\ 1 \end{bmatrix} = \begin{bmatrix} y & \cos(x) \\ x & 0 \end{bmatrix} \times \begin{bmatrix} 1 \\ 1 \end{bmatrix} = \begin{bmatrix} y + \cos(x) \\ x \end{bmatrix} +``` + +Note that from theoretical point of view this decomposition of a function is not unique, however as we will see later it usually given by the computational graph in a particular language/environment. + +## Calculation of the Forward mode + +In theory, we can calculate the gradient using forward mode as follows +Initialize the Jacobian of ``y_n`` with respect to ``x`` to an identity matrix, because as we have stated above ``y^0_n = x``, i.e. ``\frac{\partial y_n}{\partial x} = \mathbb{I}``. +Iterate `i` from `n` down to `1` as + +- calculate the next intermediate output as ``y^0_{i-1} = f_i({y^0_i})`` +- calculate Jacobian ``J_i = \left.\frac{f_i}{\partial y_i}\right|_{y^0_i}`` +- *push forward* the gradient as ``\left.\frac{\partial y_{i-1}}{\partial x}\right|_x = J_i \times \left.\frac{\partial y_i}{\partial x}\right|_x`` + +Notice that + +- on the very end, we are left with ``y = y^0_0`` and with ``\frac{\partial y_0}{\partial x}``, which is the gradient we wanted to calculate; +- if `y` is a scalar, then ``\frac{\partial y_0}{\partial x}`` is a matrix with single row +- the Jacobian and the output of the function is calculated in one sweep. + +The above is an idealized computation. The real implementation is a bit different, as we will see later. + +### Implementation of the forward mode using Dual numbers + +Forward modes need to keep track of the output of the function and of the derivative at each computation step in the computation of the complicated function ``f``. This can be elegantly realized with a [**dual number**](https://en.wikipedia.org/wiki/Dual_number), which are conceptually similar to complex numbers, but instead of the imaginary number ``i`` dual numbers use ``\epsilon`` in its second component: + +```math +x = v + \dot v \epsilon, +``` + +where ``(v,\dot v) \in \mathbb R`` and by definition ``\epsilon^2=0`` (instead +of ``i^2=-1`` in complex numbers). What are the properties of these Dual numbers? + +```math +\begin{align} +(v + \dot v \epsilon) + (u + \dot u \epsilon) &= (v + u) + (\dot v + \dot u)\epsilon \\ +(v + \dot v \epsilon)(u + \dot u \epsilon) &= vu + (u\dot v + \dot u v)\epsilon + \dot v \dot u \epsilon^2 = vu + (u\dot v + \dot u v)\epsilon \\ +\frac{v + \dot v \epsilon}{u + \dot u \epsilon} &= \frac{v + \dot v \epsilon}{u + \dot u \epsilon} \frac{u - \dot u \epsilon}{u - \dot u \epsilon} = \frac{v}{u} - \frac{(\dot u v - u \dot v)\epsilon}{u^2} +\end{align} +``` + +#### How are dual numbers related to differentiation? + +Let's evaluate the above equations at ``(v, \dot v) = (v, 1)`` and ``(u, \dot u) = (u, 0)`` +we obtain + +```math +\begin{align} +(v + \dot v \epsilon) + (u + \dot u \epsilon) &= (v + u) + 1\epsilon \\ +(v + \dot v \epsilon)(u + \dot u \epsilon) &= vu + u\epsilon\\ +\frac{v + \dot v \epsilon}{u + \dot u \epsilon} &= \frac{v}{u} + \frac{1}{u} \epsilon +\end{align} +``` + +and notice that terms ``(1, u, \frac{1}{u})`` corresponds to gradient of functions ``(u+v, uv, \frac{v}{u})`` with respect to ``v``. We can repeat it with changed values of ``\epsilon`` as ``(v, \dot v) = (v, 0)`` and ``(u, \dot u) = (u, 1)`` +and we obtain + +```math +\begin{align} +(v + \dot v \epsilon) + (u + \dot u \epsilon) &= (v + u) + 1\epsilon \\ +(v + \dot v \epsilon)(u + \dot u \epsilon) &= vu + v\epsilon\\ +\frac{v + \dot v \epsilon}{u + \dot u \epsilon} &= \frac{v}{u} - \frac{v}{u^2} \epsilon +\end{align} +``` + +meaning that at this moment we have obtained gradients with respect to ``u``. + +All above functions ``(u+v, uv, \frac{u}{v})`` are of ``\mathbb{R}^2 \rightarrow \mathbb{R}``, therefore we had to repeat the calculations twice to get gradients with respect to both inputs. This is inline with the above theory, where we have said that if input dimension is larger then output dimension, the backward mode is better. But consider a case, where we have a function + +```math +f(v) = (v + 5, 5*v, 5 / v) +``` + +which is ``\mathbb{R} \rightarrow \mathbb{R}^3``. In this case, we obtain the Jacobian ``[1, 5, -\frac{5}{v^2}]`` in a single forward pass (whereas the reverse would require three passes over the backward calculation, as will be seen later). + +#### Does dual numbers work universally? +Let's first work out polynomial. Let's assume the polynomial + +```math +p(v) = \sum_{i=1}^n p_iv^i +``` + +and compute its value at ``v + \dot v \epsilon`` (note that we know how to do addition and multiplication) + +```math +\begin{split} +p(v) &= + \sum_{i=0}^n p_i(v + \dot{v} \epsilon )^i = + \sum_{i=0}^n \left[p_i \sum_{j=0}^{n}\binom{i}{j}v^{i-j}(\dot v \epsilon)^{i}\right] = + p_0 + \sum_{i=1}^n \left[p_i \sum_{j=0}^{1}\binom{i}{j}v^{i-j}(\dot v \epsilon)^{j}\right] = \\ + &= p_0 + \sum_{i=1}^n p_i(v^i + i v^{i-1} \dot v \epsilon ) + = p(v) + \left(\sum_{i=1}^n ip_i v^{i-1}\right) \dot v \epsilon +\end{split} +``` + +where in the multiplier of ``\dot{v} \epsilon``: ``\sum_{i=1}^n ip_i v^{i - 1}``, we recognize the derivative of ``p(v)`` with respect to ``v``. This proves that Dual numbers can be used to calculate the gradient of polynomials. + +Let's now consider a general function ``f:\mathbb{R} \rightarrow \mathbb{R}``. Its value at point ``v + \dot v \epsilon`` can be approximated using Taylor expansion at function at point ``v`` as + +```math +f(v+\dot v \epsilon) = \sum_{i=0}^\infty \frac{f^i(v)\dot v^i\epsilon^i}{i!} + = f(v) + f'(v)\dot v\epsilon, +``` + +where all higher order terms can be dropped because ``\epsilon^i=0`` for ``i>1``. This shows that we can calculate the gradient of ``f`` at point ``v`` by calculating its value at ``f(v + \epsilon)`` and taking the multiplier of ``\epsilon``. + +#### Implementing Dual number with Julia + +To demonstrate the simplicity of Dual numbers, consider following definition of Dual numbers, where we define a new number type and overload functions `+`, `-`, `*`, and `/`. In Julia, this reads: + +```@example lec08 +struct Dual{T<:Number} <: Number + x::T + d::T +end + +Base.:+(a::Dual, b::Dual) = Dual(a.x+b.x, a.d+b.d) +Base.:-(a::Dual, b::Dual) = Dual(a.x-b.x, a.d-b.d) +Base.:/(a::Dual, b::Dual) = Dual(a.x/b.x, (a.d*b.x - a.x*b.d)/b.x^2) # recall (a/b) = a/b + (a'b - ab')/b^2 ϵ +Base.:*(a::Dual, b::Dual) = Dual(a.x*b.x, a.d*b.x + a.x*b.d) + +# Let's define some promotion rules +Dual(x::S, d::T) where {S<:Number, T<:Number} = Dual{promote_type(S, T)}(x, d) +Dual(x::Number) = Dual(x, zero(typeof(x))) +Dual{T}(x::Number) where {T} = Dual(T(x), zero(T)) +Base.promote_rule(::Type{Dual{T}}, ::Type{S}) where {T<:Number,S<:Number} = Dual{promote_type(T,S)} +Base.promote_rule(::Type{Dual{T}}, ::Type{Dual{S}}) where {T<:Number,S<:Number} = Dual{promote_type(T,S)} + +# and define api for forward differentionation +forward_diff(f::Function, x::Real) = _dual(f(Dual(x,1.0))) +_dual(x::Dual) = x.d +_dual(x::Vector) = _dual.(x) +``` + +And let's test the **_Babylonian Square Root_** (an algorithm to compute $\sqrt x$): + +```@repl lec08 +babysqrt(x, t=(1+x)/2, n=10) = n==0 ? t : babysqrt(x, (t+x/t)/2, n-1) + +forward_diff(babysqrt, 2) +forward_diff(babysqrt, 2) ≈ 1/(2sqrt(2)) +forward_diff(x -> [1 + x, 5x, 5/x], 2) +``` + +We now compare the analytic solution to values computed by the `forward_diff` and byt he finite differencing + +```math +f(x) = \sqrt{x} \qquad f'(x) = \frac{1}{2\sqrt{x}} +``` + +```@repl lec08 +using FiniteDifferences +forward_dsqrt(x) = forward_diff(babysqrt,x) +analytc_dsqrt(x) = 1/(2babysqrt(x)) +forward_dsqrt(2.0) +analytc_dsqrt(2.0) +central_fdm(5, 1)(babysqrt, 2.0) +``` + +```@example lec08 +plot(0.0:0.01:2, babysqrt, label="f(x) = babysqrt(x)", lw=3) +plot!(0.1:0.01:2, analytc_dsqrt, label="Analytic f'", ls=:dot, lw=3) +plot!(0.1:0.01:2, forward_dsqrt, label="Dual Forward Mode f'", lw=3, ls=:dash) +``` + +--- + +### Takeaways + +1. Forward mode $f'$ is obtained simply by pushing a `Dual` through `babysqrt` +2. To make the forward diff work in Julia, we only need to **_overload_** a few **_operators_** for forward mode AD to work on **_any function._** Therefore the name of the approach is called operator overloading. +3. For vector valued function we can use [**_Hyperduals_**](http://adl.stanford.edu/hyperdual/) +5. Forward diff can differentiation through the `setindex!` (called each time an element is assigned to a place in array, e.g. `x = [1,2,3]; x[2] = 1`) +6. ForwardDiff is implemented in [`ForwardDiff.jl`](https://github.com/JuliaDiff/ForwardDiff.jl), which might appear to be neglected, but the truth is that it is very stable and general implementation. +7. ForwardDiff does not have to be implemented through Dual numbers. It can be implemented similarly to ReverseDiff through multiplication of Jacobians, which is what is the community work on now (in [`Diffractor`](https://github.com/JuliaDiff/Diffractor.jl), [`Zygote`](https://github.com/FluxML/Zygote.jl) with rules defined in [`ChainRules`](https://github.com/JuliaDiff/ChainRules.jl)). + +--- + +## Reverse mode + +In reverse mode, the computation of the gradient follow the opposite order. +We initialize the computation by setting ``\mathbf{J}_0 = \frac{\partial y}{\partial y_0},`` which is again an identity matrix. Then we compute Jacobians and multiplications in the opposite order. The problem is that to calculate ``J_i`` we need to know the value of ``y_i^0``, which cannot be calculated in the reverse pass. The backward pass therefore needs to be preceded by the forward pass, where ``\{y_i^0\}_{i=1}^n`` are calculated. + +The complete reverse mode algorithm therefore proceeds as + +1. Forward pass: iterate `i` from `n` down to `1` as + - calculate the next intermediate output as ``y^0_{i-1} = f_i(y^0_i)`` +2. Backward pass: iterate `i` from `1` down to `n` as + - calculate Jacobian ``J_i = \left.\frac{f_i}{\partial y_i}\right|_{y_i^0} `` at point ``y_i^0`` + - *pull back* the gradient as ``\left.\frac{\partial f(x)}{\partial y_{i}}\right|_{y^0_i} = \left.\frac{\partial y_0}{\partial y_{i-1}}\right|_{y^0_{i-1}} \times J_i`` + +The need to store intermediate outs has a huge impact on memory requirements, which particularly on GPU is a big deal. Recall few lectures ago we have been discussing how excessive memory allocations can be damaging for performance, here we are given an algorithm where the excessive allocation is by design. + +### Tricks to decrease memory consumptions + +- Define **custom rules** over large functional blocks. For example while we can auto-grad (in theory) matrix product, it is much more efficient to define make a matrix multiplication as one large function, for which we define Jacobians (note that by doing so, we can dispatch on Blas). e.g + +```math +\begin{alignat*}{2} + \mathbf{C} &= \mathbf{A} * \mathbf{B} \\ + \frac{\partial{\mathbf{C}}}{\partial \mathbf{A}} &= \mathbf{B} \\ + \frac{\partial{\mathbf{C}}}{\partial \mathbf{B}} &= \mathbf{A}^{\mathrm{T}} \\ +\end{alignat*} +``` + +- When differentiating **Invertible functions**, calculate intermediate outputs from the output. This can lead to huge performance gain, as all data needed for computations are in caches. +- **Checkpointing** does not store intermediate ouputs after larger sequence of operations. When they are needed for forward pass, they are recalculated on demand. + +Most reverse mode AD engines does not support mutating values of arrays (`setindex!` in julia). This is related to the memory consumption, where after every `setindex!` you need in theory save the full matrix. [`Enzyme`](https://github.com/wsmoses/Enzyme.jl) differentiating directly LLVM code supports this, since in LLVM every variable is assigned just once. ForwardDiff methods does not suffer this problem, as the gradient is computed at the time of the values. + +::: tip + +Reverse mode AD was first published in 1976 by Seppo Linnainmaa[^1], a finnish computer scientist. It was popularized in the end of 80s when applied to training multi-layer perceptrons, which gave rise to the famous **backpropagation** algorithm[^2], which is a special case of reverse mode AD. + +[^1]: Linnainmaa, S. (1976). Taylor expansion of the accumulated rounding error. *BIT Numerical Mathematics*, 16(2), 146-160. +[^2]: Rumelhart, D. E., Hinton, G. E., and Williams, R. J. (1986). Learning representations by back-propagating errors. *Nature*, 323, 533--536. + +::: + +::: tip + +The terminology in automatic differentiation is everything but fixed. The community around `ChainRules.jl` went a great length to use something reasonable. They use **pullback** for a function realizing vector-Jacobian product in the reverse-diff reminding that the gradient is pulled back to the origin of the computation. The use **pushforward** to denote the same operation in the ForwardDiff, as the gradient is push forward through the computation. + +::: + +## Implementation details of reverse AD + +Reverse-mode AD needs to record operations over variables when computing the value of a differentiated function, such that it can walk back when computing the gradient. This _record_ is called **_tape_**, but it is effectively a directed acyclic graph. The construction of the tape can be either explicit or implicit. The code computing the gradient can be produced by operator-overloading or code-rewriting techniques. This give rise of four different takes on AD, and Julia has libraries for alll four. + +* [`Yota.jl`](https://github.com/dfdx/Yota.jl): explict tape, code-rewriting +* [`Tracker.jl`](https://github.com/FluxML/Tracker.jl), [`AutoGrad.jl`](https://github.com/denizyuret/AutoGrad.jl): implict tape, operator overloading +* [`ReverseDiff.jl`](https://github.com/JuliaDiff/ReverseDiff.jl): explict tape, operator overloading +* [`Zygote.jl`](https://github.com/FluxML/Zygote.jl): implict tape, code-rewriting + + +### Graph-based AD + +In Graph-based approach, we start with a complete knowledge of the computation graph (which is known in many cases like classical neural networks) and augment it with nodes representing the computation of the computation of the gradient (backward path). We need to be careful to add all edges representing the flow of information needed to calculate the gradient. Once the computation graph is augmented, we can find the subgraph needed to compute the desired node(s). + +Recall the example from the beginning of the lecture ``f(x, y) = \sin(x) + xy``, let's observe, how the extension of the computational graph will look like. The computation graph of function ``f`` looks like + +![diff graph](graphdiff_6.svg) + +where arrows ``\rightarrow`` denote the flow of operations and we have denoted the output of function ``f`` as ``z`` and outputs of intermediate nodes as ``h_i`` standing for *hidden*. + +We start from the top and add a node calculating ``\frac{\partial z}{\partial h_3}`` which is an identity, needed to jump-start the differentiation. + +![diff graph](graphdiff_7.svg) + +We connect it with the output of ``h_3``, even though technically in this case it is not needed, as the ``z = h_3``. +We then add a node calculating ``\frac{\partial h_3}{\partial h_2}`` for which we only need information about ``h_2`` and mark it in the graph (again, this edge can be theoretically dropped due to being equal to one regardless the inputs). Following the chain rule, we need to combine ``\frac{\partial h_3}{\partial h_2}`` with ``\frac{\partial z}{\partial h_3}`` to compute ``\frac{\partial z}{\partial h_2}`` which we note in the graph. + +![diff graph](graphdiff_9.svg) + +We continue with the same process with ``\frac{\partial h_3}{\partial h_1}``, which we again combine with ``\frac{\partial z}{\partial h_1}`` to obtain ``\frac{\partial z}{\partial h_1}``. Continuing the reverse diff we obtain the final graph + +![diff graph](graphdiff_14.svg) + +containing the desired nodes ``\frac{\partial z}{\partial x}`` and ``\frac{\partial z}{\partial y}``. This computational graph can be passed to the compiler to compute desired values. + +This approach to AD has been taken for example by [Theano](https://github.com/Theano/Theano) and by [TensorFlow](https://www.tensorflow.org/). In Tensorflow when you use functions like `tf.mul( a, b )` or `tf.add(a,b)`, you are not performing the computation in Python, but you are building the computational graph shown as above. You can then compute the values using `tf.run` with a desired inputs, but you are in fact computing the values in a different interpreter / compiler then in python. + +Advantages: + +- Knowing the computational graph in advance is great, as you can do expensive optimization steps to simplify the graph. +- The computational graph have a simple semantics (limited support for loops, branches, no objects), and the compiler is therefore simpler than the compiler of full languages. +- Since the computation of gradient augments the graph, you can run the process again to obtain higher order gradients. +- TensorFlow allows you to specialize on sizes of Tensors, which means that it knows precisely how much memory you will need and where, which decreases the number of allocations. This is quite important in GPU. + +Disadvantages: + +- You are restricted to fixed computation graph. It is generally difficult to implement `if` or `while`, and hence to change the computation according to values computed during the forward pass. +- Development and debugging can be difficult, since you are not developing the computation graph in the host language. +- Exploiting within computation graph parallelism might be difficult. + +Comments: + +- [DaggerFLux.jl](https://github.com/FluxML/DaggerFlux.jl) use this approach to perform model-based paralelism, where parts of the computation graph (and especially parameters) can reside on different machines. +- [Umlaut.jl](https://github.com/dfdx/Umlaut.jl) allows to easily obtain the tape through _tracing_ of the execution of a function, which can be then used to implement the AD as described above (see [Yota's documentation](https://dfdx.github.io/Yota.jl/dev/design/) for complete example). + +```@repl lec08 +using Umlaut +g(x, y) = x * y +f(x, y) = g(x, y)+sin(x) +tape = trace(f, 1.0, 2.0)[2] +``` + +`Yota.jl` use the tape to generate the gradient as + +```@repl lec08 +tape = Yota.gradtape(f, 1.0, 2.0; seed=1.0) +Umlaut.to_expr(tape) +``` + +### Tracking-based AD + +Alternative to static-graph based methods are methods, which builds the graph during invocation of functions and then use this dynamically built graph to know, how to compute the gradient. The dynamically built graph is frequently called *tape*. This approach is used by popular libraries like [**_PyTorch_**](https://pytorch.org/), [**_AutoGrad_**](https://github.com/HIPS/autograd), and [**_Chainer_**](https://chainer.org/) in Python ecosystem, or by [**_Tracker.jl_**](https://github.com/FluxML/Tracker.jl) (`Flux.jl`'s former AD backend), [**_ReverseDiff.jl_**](https://github.com/JuliaDiff/ReverseDiff.jl), and [**_AutoGrad.jl_**](https://github.com/denizyuret/AutoGrad.jl) (`Knet.jl`'s AD backend) in Julia. This type of AD systems is also called *operator overloading*, since in order to record the operations performed on the arguments we need to replace/wrap the original implementation. + +How do we build the tracing? Let's take a look what `ReverseDiff.jl` is doing. It defines `TrackedArray` (it also defines `TrackedReal`, but `TrackedArray` is more interesting) as + +```julia +struct TrackedArray{T,N,V<:AbstractArray{T,N}} <: AbstractArray{T,N} + value::V + deriv::Union{Nothing,V} + tape::Vector{Any} + string_tape::String +end +``` + +where in + +- `value` it stores the value of the array +- `deriv` will hold the gradient of the tracked array +- `tape` of will log operations performed with the tracked array, such that we can calculate the gradient as a sum of operations performed over the tape. + +What do we need to store on the tape? Let's denote as ``a`` the current `TrackedArray`. The gradient with respect to some output ``z`` is equal to ``\frac{\partial z}{\partial a} = \sum_{g_i} \frac{\partial z}{\partial g_i} \times \frac{\partial g_i}{\partial a}`` where ``g_i`` is the output of any function (in the computational graph) where ``a`` was a direct input. The `InstructionTape` will therefore contain a reference to ``g_i`` (which has to be of `TrackedArray` and where we know ``\frac{\partial z}{\partial g_i}`` will be stored in `deriv` field) and we also need to a method calculating ``\frac{\partial g_i}{\partial a}``, which can be stored as an anonymous function will accepting the grad as an argument. + +```julia +TrackedArray(a::AbstractArray, string_tape::String = "") = TrackedArray(a, similar(a) .= 0, [], string_tape) +TrackedMatrix{T,V} = TrackedArray{T,2,V} where {T,V<:AbstractMatrix{T}} +TrackedVector{T,V} = TrackedArray{T,1,V} where {T,V<:AbstractVector{T}} +Base.show(io::IO, ::MIME"text/plain", a::TrackedArray) = show(io, a) +Base.show(io::IO, a::TrackedArray) = print(io, "TrackedArray($(size(a.value)))") +value(A::TrackedArray) = A.value +value(A) = A +track(A, string_tape = "") = TrackedArray(A, string_tape) +track(a::Number, string_tape) = TrackedArray(reshape([a], 1, 1), string_tape) + +import Base: +, * +function *(A::TrackedMatrix, B::TrackedMatrix) + a, b = value.((A, B)) + C = TrackedArray(a * b, "($(A.string_tape) * $(B.string_tape))") + push!(A.tape, (C, ∂C -> ∂C * b')) + push!(B.tape, (C, ∂C -> a' * ∂C)) + C +end + +function *(A::TrackedMatrix, B::AbstractMatrix) + a, b = value.((A, B)) + C = TrackedArray(a * b, "($(A.string_tape) * B)") + push!(A.tape, (C, ∂C -> ∂C * b')) + C +end + +function *(A::Matrix, B::TrackedMatrix) + a, b = value.((A, B)) + C = TrackedArray(a * b, "A * $(B.string_tape)") + push!(A.tape, (C, ∂C -> ∂C * b')) + C +end + +function +(A::TrackedMatrix, B::TrackedMatrix) + C = TrackedArray(value(A) + value(B), "($(A.string_tape) + $(B.string_tape))") + push!(A.tape, (C, ∂C -> ∂C)) + push!(B.tape, (C, ∂C -> ∂C)) + C +end + +function msin(A::TrackedMatrix) + a = value(A) + C = TrackedArray(sin.(a), "sin($(A.string_tape))") + push!(A.tape, (C, ∂C -> cos.(a) .* ∂C)) + C +end +``` + +Let's observe that the operations are recorded on the tape as they should + +```julia +a = rand() +b = rand() +A = track(a, "A") +B = track(b, "B") +# R = A * B + msin(A) +C = A * B +A.tape +B.tape +C.string_tape +R = C + msin(A) +A.tape +B.tape +R.string_tape +``` + +Let's now implement a function that will recursively calculate the gradient of a term of interest. It goes over its childs, if they not have calculated the gradients, calculate it, otherwise it adds it to its own after if not, ask them to calculate the gradient and otherwise + +```julia +function accum!(A::TrackedArray) + isempty(A.tape) && return(A.deriv) + A.deriv .= sum(g(accum!(r)) for (r, g) in A.tape) + empty!(A.tape) + A.deriv +end +``` + +We can calculate the gradient by initializing the gradient of the result to vector of ones simulating the `sum` function + +```julia +using FiniteDifferences +R.deriv .= 1 +accum!(A)[1] +∇a = grad(central_fdm(5,1), a -> a*b + sin(a), a)[1] +A.deriv[1] ≈ ∇a +accum!(B)[1] +∇b = grad(central_fdm(5,1), b -> a*b + sin(a), b)[1] +B.deriv[1] ≈ ∇b +``` + +The api function for computing the grad might look like + +```julia +function trackedgrad(f, args...) + args = track.(args) + o = f(args...) + fill!(o.deriv, 1) + map(accum!, args) +end +``` + +where we should assert that the output dimension is 1. In our implementation we dirtily expect the output of f to be summed to a scalar. + +Let's compare the results to those computed by FiniteDifferences + +```julia +A = rand(4,4) +B = rand(4,4) +trackedgrad(A -> A * B + msin(A), A)[1] +grad(central_fdm(5,1), A -> sum(A * B + sin.(A)), A)[1] +trackedgrad(A -> A * B + msin(A), B)[1] +grad(central_fdm(5,1), A -> sum(A * B + sin.(A)), B)[1] +``` + +To make the above AD system really useful, we would need to + +1. Add support for `TrackedReal`, which is straightforward (we might skip the anonymous function, as the derivative of a scalar function is always a number). +2. We would need to add a lot of rules, how to work with basic values. This is why the the approach is called **operator overloading** since you need to overload a lot of functions (or methods or operators). + +For example to add all combinations for `+`, we would need to add following rules. + +```julia +function +(A::TrackedMatrix, B::TrackedMatrix) + C = TrackedArray(value(A) + value(B), "($(A.string_tape) + $(B.string_tape))") + push!(A.tape, (C, ∂C -> ∂C )) + push!(B.tape, (C, ∂C -> ∂C)) + C +end + +function +(A::AbstractMatrix, B::TrackedMatrix) + C = TrackedArray(A * value(B), "(A + $(B.string_tape))") + push!(B.tape, (C, ∂C -> ∂C)) + C +end +``` + +Advantages: + +- Debugging and development is nicer, as AD is implemented in the same language. +- The computation graph, tape, is dynamic, which makes it simpler to take the gradient in the presence of `if` and `while`. + +Disadvantages: + +- The computation graph is created and differentiated during every computation, which might be costly. In most deep learning applications, this overhead is negligible in comparison to time of needed to perform the operations itself (`ReverseDiff.jl` allows to compile the tape). +- The compiler has limited options for optimization, since the tape is created during the execution. +- Since computation graph is dynamic, it cannot be optimized as the static graph, the same holds for the memory allocations. + +A more complete example which allow to train feed-forward neural network on GPU can be found [here](ffnn.jl). + +::: tip + +The difference between tracking and graph-based AD systems is conceptually similar to interpreted and compiled programming languages. Tracking AD systems interpret the time while computing the gradient, while graph-based AD systems compile the computation of the gradient. + +::: + +## ChainRules + +From our discussions about AD systems so far we see that while the basic, *engine*, part is relatively straightforward, the devil is in writing the rules prescribing the computation of gradients. These rules are needed for every system whether it is graph based, tracking, or Wengert list based. ForwardDiff also needs a rule system, but rules are a bit different (as they are pushing the gradient forward rather than pulling it back). It is obviously a waste of effort for each AD system to have its own set of rules. Therefore the community (initiated by Catherine Frames White backed by [Invenia](https://github.com/invenia)) have started to work on a unified system to express differentiation rules, such that they can be shared between systems. So far, they are supported by `Zygote.jl`, `Nabla.jl`, `ReverseDiff.jl` and `Diffractor.jl`, suggesting that the unification approach is working (but not by `Enzyme.jl`). + +The definition of reverse diff rules follows the idea we have nailed above (we refer readers interested in forward diff rules [to official documentation](https://juliadiff.org/ChainRulesCore.jl)). + +`ChainRules` defines the reverse rules for function `foo` in a function `rrule` with the following signature + +```julia +function rrule(::typeof(foo), args...; kwargs...) + ... + return y, pullback +end +``` + +where + +- the first argument `::typeof(foo)` allows to dispatch on the function for which the rules is written +- the output of function `foo(args...)` is returned as the first argument +- `pullback(Δy)` takes the gradient of upstream functions with respect to the output of `foo(args)` and returns it multiplied by the jacobian of the output of `foo(args)` with respect to parameters of the function itself (recall the function can have parameters, as it can be a closure or a functor), and with respect to the arguments. + +```julia +function pullback(Δy) + ... + return ∂self, ∂args... +end +``` + +Notice that key-word arguments are not differentiated. This is a design decision with the explanation that parametrize the function, but most of the time, they are not differentiable. + +`ChainRules.jl` provides support for lazy (delayed) computation using `Thunk`. Its argument is a function, which is not evaluated until `unthunk` is called. There is also a support to signal that gradient is zero using `ZeroTangent` (which can save valuable memory) or to signal that the gradient does not exist using `NoTangent`. + +How can we use ChainRules to define rules for our AD system? +Let's first observe the output + +```julia +using ChainRulesCore, ChainRules +r, g = rrule(*, rand(2,2), rand(2,2)) +g(r) +``` + +With that, we can extend our AD system as follows + +```julia +import Base: *, +, - + +for f in [:*, :+, :-] + @eval function $(f)(A::TrackedMatrix, B::AbstractMatrix) + C, pullback = rrule($(f), value(A), B) + C = track(C) + push!(A.tape, (C, Δ -> pullback(Δ)[2])) + C + end + + @eval function $(f)(A::AbstractMatrix, B::TrackedMatrix) + C, pullback = rrule($(f), A, value(B)) + C = track(C) + push!(B.tape, (C, Δ -> pullback(Δ)[3])) + C + end + + @eval function $(f)(A::TrackedMatrix, B::TrackedMatrix) + C, pullback = rrule($(f), value(A), value(B)) + C = track(C) + push!(A.tape, (C, Δ -> pullback(Δ)[2])) + push!(B.tape, (C, Δ -> pullback(Δ)[3])) + C + end +end +``` + +and we need to modify our `accum!` code to `unthunk` if needed + +```julia +function accum!(A::TrackedArray) + isempty(A.tape) && return(A.deriv) + A.deriv .= sum(unthunk(g(accum!(r))) for (r, g) in A.tape) +end +``` + +```julia +A = rand(4,4) +B = rand(4,4) +grad(A -> (A * B + msin(A))*B, A)[1] +gradient(A -> sum(A * B + sin.(A)), A)[1] +grad(A -> A * B + msin(A), B)[1] +gradient(A -> sum(A * B + sin.(A)), B)[1] +``` + +## Source-to-source AD using Wengert + +Recall the compile stages of julia and look, how the lowered code for + +```julia +f(x,y) = x*y + sin(x) +``` + +looks like + +```julia +julia> @code_lowered f(1.0, 1.0) +CodeInfo( +1 ─ %1 = x * y +│ %2 = Main.sin(x) +│ %3 = %1 + %2 +└── return %3 +) +``` + +This form is particularly nice for automatic differentiation, as we have on the left hand side always a single variable, which means the compiler has provided us with a form, on which we know, how to apply AD rules. + +What if we somehow be able to talk to the compiler and get this form from him? + +* [simplest viable implementation](https://juliadiff.org/ChainRulesCore.jl/dev/autodiff/operator_overloading.html#ReverseDiffZero) + +### Sources for this lecture + +* Mike Innes' [diff-zoo](https://github.com/MikeInnes/diff-zoo) +* [Write Your Own StS in One Day](https://blog.rogerluo.me/2019/07/27/yassad/) +* [Build your own AD with Umlaut](https://dfdx.github.io/Yota.jl/dev/design/) +* [Zygote.jl Paper](https://arxiv.org/pdf/1810.07951.pdf) + and [Zygote.jl Internals](https://fluxml.ai/Zygote.jl/dev/internals/) +* Keno's [Talk](https://www.youtube.com/watch?v=mQnSRfseu0c&feature=youtu.be) +* Chris' [Lecture](https://mitmath.github.io/18337/lecture11/adjoints) +* [Automatic-Differentiation-Based-on-Computation-Graph](https://liebing.org.cn/2019/07/22/Automatic-Differentiation-Based-on-Computation-Graph/) diff --git a/docs_vitepress/src/lectures/lecture_08/reverse-nn.jl b/docs_vitepress/src/lectures/lecture_08/reverse-nn.jl new file mode 100644 index 00000000..1dd431de --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/reverse-nn.jl @@ -0,0 +1,112 @@ +using Scientific_Programming_in_Julia +using Base: tail + +import Scientific_Programming_in_Julia.ReverseDiff + +using Base: tail +struct Dense{M<:TrackedMatrix, V<:TrackedVector, F} + W::M + b::V + f::F +end + + +Dense(W::Matrix, b::Vector, f) = Dense(track(W),track(b),f) +Dense(in,out,f=identity) = Dense(rand(out,in) .-0.5, rand(out) .-0.5, f) +(m::Dense)(x) = m.f(m.W*x + m.b) + + +struct Chain{T<:Tuple} + layers::T +end +Chain(ls...) = Chain(ls) + +(m::Chain)(x) = applychain(m.layers, x) +applychain(ls::Tuple{}, x) = x +applychain(ls::Tuple, x) = applychain(tail(ls), first(ls)(x)) + +params(m::Dense) = (W=m.W, b=m.b) +params(m::Chain) = params(m.layers) +params(ls::Tuple) = (params(first(ls))..., params(tail(ls))...) +params(ls::Tuple{}) = ls + +function ReverseDiff.reset!(m::Chain) + map(ReverseDiff.reset!, m.layers) + nothing +end + +function ReverseDiff.reset!(m::Dense) + ReverseDiff.reset!(m.W) + ReverseDiff.reset!(m.b) + nothing +end + +function train_step!(loss, network, xs, ys; λ=0.01) + l = loss(network, xs, ys) + l.grad = 1.0 + for w in params(network) + w.data .-= λ .* accum!(w) + end + l +end + + +# task +f(x,y) = y^2 + sin(x) +f(xs) = [f(xs[1],xs[2])] + +# data +xs = map(1:30) do i + x = rand(-4:0.1:4) + y = rand(-2:0.1:2) + [x,y] +end +ys = track.(f.(xs)) +xs = track.(xs) + + +hdim = 15 +network = Chain( + Dense(2,hdim,σ), + Dense(hdim,hdim,σ), + Dense(hdim,1) +) +function loss(network,xs,ys) + errs = hcat([abs2(network(x)-y) for (x,y) in zip(xs,ys)]...) + sum(errs) +end + +using Plots +color_scheme = cgrad(:RdYlBu_5, rev=true) + +forward(network,x,y) = network(track([x,y])).data[1] + +# training +anim = @animate for i in 1:2000 + ReverseDiff.reset!.(xs) + ReverseDiff.reset!.(ys) + ReverseDiff.reset!(network) + l = train_step!(loss, network, xs, ys, λ=0.003) + if mod1(i,50) == 1 + @info i l + p1 = contour(-4:0.3:4, -2:0.3:2, f, fill=true, c=color_scheme, xlabel="x", ylabel="y", title="Truth") + p2 = contour(-4:0.3:4, -2:0.3:2, (x,y)->forward(network,x,y), fill=true, c=color_scheme, xlabel="x",title="Iteration: $i") + p = plot(p1,p2,size=(1200,400)) |> display + end +end every 50 + +gif(anim, "anim.gif", fps=15) +error() + +for i in 1:20000 + ReverseDiff.reset!.(xs) + ReverseDiff.reset!.(ys) + ReverseDiff.reset!(network) + l = train_step!(loss, network, xs, ys, λ=0.002) + if mod1(i,1000) == 1 + @info i l + p1 = contour(-4:0.3:4, -2:0.3:2, f, fill=true, c=color_scheme, xlabel="x", ylabel="y", title="Truth") + p2 = contour(-4:0.3:4, -2:0.3:2, (x,y)->forward(network,x,y), fill=true, c=color_scheme, xlabel="x",title="Iteration: $i") + plot(p1,p2) |> display + end +end diff --git a/docs_vitepress/src/lectures/lecture_08/style.tex b/docs_vitepress/src/lectures/lecture_08/style.tex new file mode 100644 index 00000000..6fb0e122 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_08/style.tex @@ -0,0 +1,6 @@ +\tikzstyle{forward}=[circle,fill=black!25,minimum size=20pt,inner sep=0pt] +\tikzstyle{reverse} = [forward, fill=red!24] +\tikzstyle{edge} = [draw,thick,->] +\tikzstyle{weight} = [font=\small] +\tikzstyle{selected edge} = [draw,line width=5pt,-,red!50] +\tikzstyle{ignored edge} = [draw,line width=5pt,-,black!20] diff --git a/docs_vitepress/src/lectures/lecture_09/argcheck.jl b/docs_vitepress/src/lectures/lecture_09/argcheck.jl new file mode 100644 index 00000000..82f036b1 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/argcheck.jl @@ -0,0 +1,91 @@ +using MacroTools +using IRTools + +using IRTools: branches, block, empty!, evalir, func, branch!, block, IR, @dynamo, xcall + +function _mark(label, ex) + label isa Symbol || error("label has to be a Symbol") + return Expr( + :block, + Expr(:meta, :begin_optional, label), + esc(ex), + Expr(:meta, :end_optional, label), + ) +end + +macro mark(label, ex) + _mark(label, ex) +end + + +foo(x) = bar(baz(x)) + +function bar(x) + @mark print iseven(x) && println("The input is even.") + x +end + +function baz(x) + @mark print x<0 && println("The input is negative.") + x +end + + + +isbegin(e::Expr) = Meta.isexpr(e,:meta) && e.args[1]===:begin_optional +isend(e::Expr) = Meta.isexpr(e,:meta) && e.args[1]===:end_optional + + +skip(f::Core.IntrinsicFunction, args...) = f(args...) +skip(f::Core.Builtin, args...) = f(args...) + +@dynamo function skip(args...) + ir = IR(args...) + delete_line = false + local orig + + for (x,st) in ir + is_begin = isbegin(st.expr) + is_end = isend(st.expr) + + if is_begin + delete_line = true + end + + if is_begin + orig = block(ir,x) + elseif is_end + dest = block(ir,x) + if orig != dest + empty!(branches(orig)) + branch!(orig,dest) + end + end + + if delete_line + delete!(ir,x) + end + + if is_end + delete_line = false + end + + if haskey(ir,x) && Meta.isexpr(st.expr,:call) + ir[x] = IRTools.xcall(skip, st.expr.args...) + end + end + return ir +end + +function skip(ex::Expr) +end + +macro skip(ex) + ex.head == :call || error("Input expression has to be a `:call`.") + return xcall(skip, ex.args...) +end + +display(@code_ir foo(-2)) +display(@code_ir skip(foo,-2)) +display(foo(-2)) +@skip foo(-2) diff --git a/docs_vitepress/src/lectures/lecture_09/codeinfo.jl b/docs_vitepress/src/lectures/lecture_09/codeinfo.jl new file mode 100644 index 00000000..572d51aa --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/codeinfo.jl @@ -0,0 +1,147 @@ +using Dictionaries +include("loggingprofiler.jl") + +function retrieve_code_info(sigtypes, world = Base.get_world_counter()) + S = Tuple{map(s -> Core.Compiler.has_free_typevars(s) ? typeof(s.parameters[1]) : s, sigtypes)...} + _methods = Base._methods_by_ftype(S, -1, world) + if isempty(_methods) + @info("method $(sigtypes) does not exist") + return(nothing) + end + type_signature, raw_static_params, method = _methods[1] + mi = Core.Compiler.specialize_method(method, type_signature, raw_static_params, false) + ci = Base.isgenerated(mi) ? Core.Compiler.get_staged(mi) : Base.uncompressed_ast(method) + Base.Meta.partially_inline!(ci.code, [], method.sig, Any[raw_static_params...], 0, 0, :propagate) + ci +end + +function overdubbable(ex::Expr) + ex.head != :call && return(false) + length(ex.args) < 2 && return(false) + return(overdubbable(ex.args[1])) +end +overdubbable(gr::Core.GlobalRef) = gr.name ∉ [:overdub, :record_start, :record_end, :promote, :convert, :tuple] +# overdubbable(gr::Symbol) = +overdubbable(ex) = false +timable(ex) = overdubbable(ex) + +# +remap(ex::Expr, maps) = Expr(ex.head, remap(ex.args, maps)...) +remap(args::AbstractArray, maps) = map(a -> remap(a, maps), args) +remap(c::Core.GotoNode, maps) = Core.GotoNode(maps.goto[c.label]) +remap(c::Core.GotoIfNot, maps) = Core.GotoIfNot(remap(c.cond, maps), maps.goto[c.dest]) +remap(r::Core.ReturnNode, maps) = Core.ReturnNode(remap(r.val, maps)) +remap(a::Core.SlotNumber, maps) = maps.slots[a.id] +remap(a::Core.SSAValue, maps) = Core.SSAValue(maps.ssa[a.id]) +remap(a::Core.NewvarNode, maps) = Core.NewvarNode(maps.slots[a.slot.id]) +remap(a::GlobalRef, maps) = a +remap(a::QuoteNode, maps) = a +remap(ex, maps) = ex + +exportname(ex::GlobalRef) = QuoteNode(ex.name) +exportname(ex::Symbol) = QuoteNode(ex) +exportname(ex::Expr) = exportname(ex.args[1]) +exportname(i::Int) = QuoteNode(Symbol("Int(",i,")")) + +dummy() = return +function empty_codeinfo() + new_ci = code_lowered(dummy, Tuple{})[1] + empty!(new_ci.code) + empty!(new_ci.slotnames) + empty!(new_ci.linetable) + empty!(new_ci.codelocs) + new_ci +end + +overdub(f::Core.IntrinsicFunction, args...) = f(args...) +overdub(f::Core.Builtin, args...) = f(args...) + +@generated function overdub(f::F, args...) where {F} + ci = retrieve_code_info((F, args...)) + if ci === nothing + return(Expr(:call, :f, [:(args[$(i)]) for i in 1:length(args)]...)) + end + + new_ci = empty_codeinfo() + new_ci.slotnames = vcat([Symbol("#self#"), :f, :args], ci.slotnames[2:end]) + new_ci.slotflags = vcat([0x00, 0x00, 0x00], ci.slotflags[2:end]) + foreach(s -> push!(new_ci.linetable, s), ci.linetable) + + maps = ( + ssa = Dict{Int, Int}(), + slots = Dict{Int, Any}(), + goto = Dict{Int,Int}(), + ) + + #we need to map indexes of slot-variables from ci to their new values. + # except the first one, we just remap them + maps.slots[1] = Core.SlotNumber(1) + foreach(i -> maps.slots[i] = Core.SlotNumber(i + 2), 2:length(ci.slotnames)) # they are shifted by 2 accomondating inserted `f` and `args` + + #if somewhere the original parameters of the functions will be used + #they needs to be remapped to an SSAValue from here, since the overdubbed + # function has signatures overdub(f, args...) instead of f(x,y,z...) + newci_no = 0 + for i in 1:length(args) + newci_no +=1 + push!(new_ci.code, Expr(:call, Base.getindex, Core.SlotNumber(3), i)) + maps.slots[i+1] = Core.SSAValue(newci_no) + push!(new_ci.codelocs, ci.codelocs[1]) + end + + for (ci_no, ex) in enumerate(ci.code) + if timable(ex) + fname = exportname(ex) + push!(new_ci.code, Expr(:call, GlobalRef(LoggingProfiler, :record_start), fname)) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + maps.goto[ci_no] = newci_no + # if overdubbable(ex) + # ex = Expr(:call, GlobalRef(Main, :overdub), ex.args...) + # end + push!(new_ci.code, ex) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + maps.ssa[ci_no] = newci_no + push!(new_ci.code, Expr(:call, GlobalRef(LoggingProfiler, :record_end), fname)) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + else + push!(new_ci.code, ex) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + maps.ssa[ci_no] = newci_no + end + end + + for i in length(args)+1:length(new_ci.code) + new_ci.code[i] = remap(new_ci.code[i], maps) + end + new_ci + new_ci.inferred = false + new_ci.ssavaluetypes = length(new_ci.code) + # new_ci + return(new_ci) +end + +LoggingProfiler.reset!() +new_ci = overdub(sin, 1.0) +LoggingProfiler.to + +function foo(x, y) + z = x * y + z + sin(y) +end + + +LoggingProfiler.reset!() +overdub(foo, 1.0, 1.0) +LoggingProfiler.to + +macro record(ex) + Expr(:call, :overdub, ex.args...) +end + +LoggingProfiler.reset!() +@record foo(1.0, 1.0) +LoggingProfiler.to diff --git a/docs_vitepress/src/lectures/lecture_09/ircode.md b/docs_vitepress/src/lectures/lecture_09/ircode.md new file mode 100644 index 00000000..4904d7e2 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/ircode.md @@ -0,0 +1,173 @@ +These notes are from poking around `Core.Compiler` to see, how they are different from working just with `CodeInfo` and `IRTools.jl`. Notes are mainly around IRCode. Why there is a `Core.Compiler.IRCode` when there was `Core.CodeInfo`? Seems to be historical reasons. At the beginning, Julia did not have any intermediate representation and code directly emitted LLVM. Then, it has received an `CodeInfo` as in intermediate representation. `IRCode` seems like an evolution of `CodeInfo`. `Core.Compiler` works mostly with `IRCode`, but the `IRCode` can be converted to the `CodeInfo` and the other way around. `IRCode` seems to be designed more for implementation of various optimisation phases. Personal experience tells me it is much nicer to work with even on the low level. + +Throughout the explanation, we assume that `Core.Compiler` was imported as `CC` to decrease the typing load. + +Let's play with a simple silly function + +```julia + +function foo(x,y) + z = x * y + z + sin(x) +end +``` + +### IRCode + +We can obtain `CC.IRCode` + +```julia +import Core.Compiler as CC +(ir, rt) = only(Base.code_ircode(foo, (Float64, Float64), optimize_until = "compact 1")) +``` + +which returns `Core.Compiler.IRCode` in `ir` and return-type `Float64` in `rt`. +The output might look like + +```julia +julia> (ir, rt) = only(Base.code_ircode(foo, (Float64, Float64), optimize_until = "compact 1")) + 1─ %1 = (_2 * _3)::Float64 + │ %2 = Main.sin(_2)::Float64 + │ %3 = (%1 + %2)::Float64 + └── return %3 + => Float64 + +``` + +Options of `optimize_until` are `compact 1`, `compact 2`, `nothing.` I do not see a difference between `compact 2` and `compact 2`. + +The IRCode structure is defined as + +```julia +struct IRCode + stmts::InstructionStream + argtypes::Vector{Any} + sptypes::Vector{VarState} + linetable::Vector{LineInfoNode} + cfg::CFG + new_nodes::NewNodeStream + meta::Vector{Expr} +end +``` + +where + +* `stmts` is a stream of instruction (more in this below) +* `argtypes` holds types of arguments of the function whose `IRCode` we have obtained +* `sptypes` is a vector of `VarState`. It seems to be related to parameters of types +* `linetable` is a table of unique lines in the source code from which statements +* `cfg` holds control flow graph, which contains building blocks and jumps between them +* `new_nodes` is an infrastructure that can be used to insert new instructions to the existing `IRCode` . The idea behind is that since insertion requires a renumbering all statements, they are put in a separate queue. They are put to correct position with a correct `SSANumber` by calling `compact!`. +* `meta` is something. + +Before going further, let's take a look on `InstructionStream` defined as + +```julia +struct InstructionStream + inst::Vector{Any} + type::Vector{Any} + info::Vector{CallInfo} + line::Vector{Int32} + flag::Vector{UInt8} +end +``` + +where + +* `inst` is a vector of instructions, stored as `Expr`essions. The allowed fields in `head` are described [here](https://docs.julialang.org/en/v1/devdocs/ast/#Expr-types) +* `type` is the type of the value returned by the corresponding statement +* `CallInfo` is ???some info??? +* `line` is an index into `IRCode.linetable` identifying from which line in source code the statement comes from +* `flag` are some flags providing additional information about the statement. + - `0x01 << 0` = statement is marked as `@inbounds` + - `0x01 << 1` = statement is marked as `@inline` + - `0x01 << 2` = statement is marked as `@noinline` + - `0x01 << 3` = statement is within a block that leads to `throw` call + - `0x01` << 4 = statement may be removed if its result is unused, in particular it is thus be both pure and effect free + - `0x01 << 5-6 = ` + - `0x01 << 7 = ` has out-of-band info + +For the above `foo` function, the InstructionStream looks like + +```julia +julia> DataFrame(flag = ir.stmts.flag, info = ir.stmts.info, inst = ir.stmts.inst, line = ir.stmts.line, type = ir.stmts.type) +4×5 DataFrame + Row │ flag info inst line type + │ UInt8 CallInfo Any Int32 Any +─────┼──────────────────────────────────────────────────────────────────────── + 1 │ 112 MethodMatchInfo(MethodLookupResu… _2 * _3 1 Float64 + 2 │ 80 MethodMatchInfo(MethodLookupResu… Main.sin(_2) 2 Float64 + 3 │ 112 MethodMatchInfo(MethodLookupResu… %1 + %2 2 Float64 + 4 │ 0 NoCallInfo() return %3 2 Any +``` + +We can index into the statements as `ir.stmts[1]`, which provides a "view" into the vector. To obtain the first instruction, we can do `ir.stmts[1][:inst]`. + +The IRCode is typed, but the fields can contain `Any`. It is up to the user to provide corrrect types of the output and there is no helper functions to perform typing. A workaround is shown in the Petite Diffractor project. Julia's sections of the manual https://docs.julialang.org/en/v1/devdocs/ssair/ and seems incredibly useful. The IR form they talk about seems to be `Core.Compiler.IRCode`. + +It seems to be that it is possible to insert IR instructions into the it structure by queuing that to the field `stmts` and then call `compact!`, which would perform the heavy machinery of relabeling everything. + +#### Example of modifying the function through IRCode + +Below is an MWE that tries to modify the IRCode of a function and execute it. The goal is to change the function `foo` to `fooled`. + +```julia +import Core.Compiler as CC +using Core: SSAValue, GlobalRef, ReturnNode + +function foo(x,y) + z = x * y + z + sin(x) +end + +function fooled(x,y) + z = x * y + z + sin(x) + cos(y) +end + +(ir, rt) = only(Base.code_ircode(foo, (Float64, Float64), optimize_until = "compact 1")); +nr = CC.insert_node!(ir, 2, CC.NewInstruction(Expr(:call, Core.GlobalRef(Main, :cos), Core.Argument(3)), Float64)) +nr2 = CC.insert_node!(ir, 4, CC.NewInstruction(Expr(:call, GlobalRef(Main, :+), SSAValue(3), nr), Float64)) +CC.setindex!(ir.stmts[4], ReturnNode(nr2), :inst) +ir = CC.compact!(ir) +irfooled = Core.OpaqueClosure(ir) +irfooled(1.0, 2.0) == fooled(1.0, 2.0) +``` + +So what we did? +1. `(ir, rt) = only(Base.code_ircode(foo, (Float64, Float64), optimize_until = "compact 1"))` obtain the `IRCode` of the function `foo` when called with both arguments being `Float64`. `rt` contains the return type of the +2. A new instruction `cos` is inserted to the `ir` by `Core.Compiler.insert_node!`, which takes as an argument an `IRCode`, position (2 in our case), and new instruction. The new instruction is created by `NewInstruction` accepting as an input expression `Expr` and a return type. Here, we force it to be `Float64`, but ideally it should be inferred. (This would be the next stage). Or, may-be, we can run it through type inference? . The new instruction is added to the `ir.new_nodes` instruction stream and obtain a new SSAValue returned in `nr`, which can be then used further. +3. We add one more instruction `+` that uses output of the instruction we add in step 2, `nr` and SSAValue from statement 3 of the original IR (at this moment, the IR is still numbered with respect to the old IR, the renumbering will happen later.) The output of this second instruction is returned in `nr2`. +4. Then, we rewrite the return statement to return `nr2` instead of `SSAValue(3)`. +5. `ir = CC.compact!(ir)` is superimportant since it moves the newly added statements from `ir.new_stmts` to `ir.stmts` and importantly renumbers `SSAValues.` *Even though the function is mutating, the mutation here is meant that the argument is changed, but the new correct IRCode is returned and therefore has to be reassigned.* +6. The function is created through `OpaqueClosure.` +7. The last line certifies that the function do what it should do. + +There is no infrastructure to make the above manipulation transparent, like is the case of @generated function and codeinfo. It is possible to hook through generated function by converting the IRCode to untyped CodeInfo, in which case you do not have to bother with typing. + +#### How to obtain code info the proper way? + +This is the way code info is obtained in the diffractor. + +```julia +mthds = Base._methods_by_ftype(sig, -1, world) +match = only(mthds) + +mi = Core.Compiler.specialize_method(match) +ci = Core.Compiler.retrieve_code_info(mi, world) +``` + +### CodeInfo + +`IRTools.jl` are great for modifying `CodeInfo`. I have found two tools for modifying `IRCode` and I wonder if they have been abandoned because they were both dead ends or because of lack of human labor. I am also aware of Also, [this](https://nbviewer.org/gist/tkf/d4734be24d2694a3afd669f8f50e6b0f/00_notebook.ipynb) is quite cool play with IRStuff. + + +Resources +* https://vchuravy.dev/talks/licm/ +* [CompilerPluginTools](https://github.com/JuliaCompilerPlugins/CompilerPluginTools.jl) +* [CodeInfoTools.jl](https://github.com/JuliaCompilerPlugins/CodeInfoTools.jl). +* TKF's [CodeInfo.jl](https://github.com/tkf/ShowCode.jl) is nice for visualization of the IRCode +* Diffractor is an awesome source of howto. For example function `my_insert_node!` in `src/stage1/hacks.jl` +* https://nbviewer.org/gist/tkf/d4734be24d2694a3afd669f8f50e6b0f/00_notebook.ipynb +* https://github.com/JuliaCompilerPlugins/Mixtape.jl + diff --git a/docs_vitepress/src/lectures/lecture_09/irtools.jl b/docs_vitepress/src/lectures/lecture_09/irtools.jl new file mode 100644 index 00000000..8cf76802 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/irtools.jl @@ -0,0 +1,55 @@ +using IRTools +using IRTools: var, xcall, insert!, insertafter!, func, recurse!, @dynamo +include("loggingprofiler.jl") +LoggingProfiler.resize!(LoggingProfiler.to, 10000) + +function timable(ex::Expr) + ex.head != :call && return(false) + length(ex.args) < 2 && return(false) + ex.args[1] isa Core.GlobalRef && return(true) + ex.args[1] isa Symbol && return(true) + return(false) +end +timable(ex) = false + +recursable(gr::GlobalRef) = gr.name ∉ [:profile_fun, :record_start, :record_end] +recursable(ex::Expr) = ex.head == :call && recursable(ex.args[1]) +recursable(ex) = false + +exportname(ex::GlobalRef) = QuoteNode(ex.name) +exportname(ex::Symbol) = QuoteNode(ex) +exportname(ex::Expr) = exportname(ex.args[1]) +exportname(i::Int) = QuoteNode(Symbol("Int(",i,")")) + +profile_fun(f::Core.IntrinsicFunction, args...) = f(args...) +profile_fun(f::Core.Builtin, args...) = f(args...) + +@dynamo function profile_fun(f, args...) + ir = IRTools.Inner.IR(f, args...) + for (v, ex) in ir + if timable(ex.expr) + fname = exportname(ex.expr) + insert!(ir, v, xcall(LoggingProfiler, :record_start, fname)) + insertafter!(ir, v, xcall(LoggingProfiler, :record_end, fname)) + end + end + for (x, st) in ir + recursable(st.expr) || continue + ir[x] = xcall(profile_fun, st.expr.args...) + end + # recurse!(ir) + return ir +end + +macro record(ex) + esc(Expr(:call, :profile_fun, ex.args...)) +end + +function foo(x, y) + z = x * y + z + sin(y) +end + +LoggingProfiler.reset!() +@record foo(1.0, 1.0) +LoggingProfiler.to diff --git a/docs_vitepress/src/lectures/lecture_09/irtools.md.bak b/docs_vitepress/src/lectures/lecture_09/irtools.md.bak new file mode 100644 index 00000000..509a0aa2 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/irtools.md.bak @@ -0,0 +1,338 @@ + +## Zygote internals + +```julia +function pullback(f, args...) + y, back = _pullback(f, args...) + y, Δ -> tailmemaybe(back(Δ)) +end + +function gradient(f, args...) + y, back = pullback(f, args...) + grad = back(sensitivity(y)) + isnothing(grad) ? nothing : map(_project, args, grad) +end + +_pullback(f, args...) = _pullback(Context(), f, args...) + +@generated function _pullback(ctx::AContext, f, args...) + # Try using ChainRulesCore + if is_kwfunc(f, args...) + # if it is_kw then `args[1]` are the keyword args, `args[2]` is actual function + cr_T = Tuple{ZygoteRuleConfig{ctx}, args[2:end]...} + chain_rrule_f = :chain_rrule_kw + else + cr_T = Tuple{ZygoteRuleConfig{ctx}, f, args...} + chain_rrule_f = :chain_rrule + end + + hascr, cr_edge = has_chain_rrule(cr_T) + hascr && return :($chain_rrule_f(ZygoteRuleConfig(ctx), f, args...)) + + # No ChainRule, going to have to work it out. + T = Tuple{f,args...} + ignore_sig(T) && return :(f(args...), Pullback{$T}(())) + + g = try + _generate_pullback_via_decomposition(T) + catch e + rethrow(CompileError(T,e)) + end + g === nothing && return :(f(args...), Pullback{$T}((f,))) + meta, forw, _ = g + argnames!(meta, Symbol("#self#"), :ctx, :f, :args) + forw = varargs!(meta, forw, 3) + # IRTools.verify(forw) + forw = slots!(pis!(inlineable!(forw))) + # be ready to swap to using chainrule if one is declared + cr_edge != nothing && edge!(meta, cr_edge) + return update!(meta.code, forw) +end +``` +## Source Code Transformation + +The most recent approach to Reverse Mode AD is **_Source-to-Source_** +transformation adopted by packages like **_JAX_** and **_Zygote.jl_**. +Transforming code promises to eliminate the problems of tracing-based AD. +`Tracked` types are not needed anymore, which reduces memory usage, promising +significant speedups. Additionally, the reverse pass becomes a *compiler +problem*, which makes it possible to leverage highly optimized compilers like +LLVM. + +Source-to-source AD uses meta-programming to produce `rrule`s for any function +that is a composition of available `rrule`s. The code for `foo` +```@example lec08 +foo(x) = h(g(f(x))) + +f(x) = x^2 +g(x) = sin(x) +h(x) = 5x +nothing # hide +``` +is transformed into +```julia eval=false +function rrule(::typeof(foo), x) + a, Ja = rrule(f, x) + b, Jb = rrule(g, a) + y, Jy = rrule(h, b) + + function dfoo(Δy) + Δb = Jy(Δy) + Δa = Jb(Δb) + Δx = Ja(Δa) + return Δx + end + + return y, dfoo +end +``` +For this simple example we can define the three `rrule`s by hand: +```@example lec08 +rrule(::typeof(f), x) = f(x), Δ -> 2x*Δ +rrule(::typeof(g), x) = g(x), Δ -> cos(x)*Δ +rrule(::typeof(h), x) = h(x), Δ -> 5*Δ +``` +Remember that this is a very artificial example. In real AD code you would +overload functions like `+`, `*`, etc, such that you don't have to define a +`rrule` for something like `5x`. + +In order to transform our functions safely we will make use of `IRTools.jl` +(*Intermediate Representation Tools*) which provide some convenience functions +for inspecting and manipulating code snippets. The IR for `foo` looks like this: +```@example lec08 +using IRTools: @code_ir, evalir +ir = @code_ir foo(2.) +``` +```@setup lec08 +msg = """ +ir = 1: (%1, %2) ## rrule(foo, x) + %3 = Main.f(%2) ## a = f(x) + %4 = Main.g(%3) ## b = g(a) + %5 = Main.h(%4) ## y = h(b) + return %5 ## return y +""" +``` +Variable names are replaced by `%N` and each function gets is own line. +We can evalulate the IR (to actually run it) like this +```@example lec08 +evalir(ir, nothing, 2.) +``` +As a first step, lets transform the function calls to `rrule` calls. For +this, all we need to do is iterate through the IR line by line and replace each +statement with `(Main.rrule)(Main.func, %N)`, where `Main` just stand for the +gobal main module in which we just defined our functions. +But remember that the `rrule` returns +the value `v` *and* the pullback `J` to compute the gradient. Just +replacing the statements would alter our forward pass. Instead we can insert +each statement *before* the one we want to change. Then we can replace the the +original statement with `v = rr[1]` to use only `v` and not `J` in the +subsequent computation. +```@example lec08 +using IRTools +using IRTools: xcall, stmt + +xgetindex(x, i...) = xcall(Base, :getindex, x, i...) + +ir = @code_ir foo(2.) +pr = IRTools.Pipe(ir) + +for (v,statement) in pr + ex = statement.expr + rr = xcall(rrule, ex.args...) + # pr[v] = stmt(rr, line=ir[v].line) + vJ = insert!(pr, v, stmt(rr, line = ir[v].line)) + pr[v] = xgetindex(vJ,1) +end +ir = IRTools.finish(pr) +# +#msg = """ +#ir = 1: (%1, %2) ## rrule(foo, x) +# %3 = (Main.rrule)(Main.f, %2) ## ra = rrule(f,x) +# %4 = Base.getindex(%3, 1) ## a = ra[1] +# %5 = (Main.rrule)(Main.g, %4) ## rb = rrule(g,a) +# %6 = Base.getindex(%5, 1) ## b = rb[1] +# %7 = (Main.rrule)(Main.h, %6) ## ry = rrule(h,b) +# %8 = Base.getindex(%7, 1) ## y = ry[1] +# return %8 ## return y +#""" +#println(msg) +``` +Evaluation of this transformed IR should still give us the same value +```@example lec08 +evalir(ir, nothing, 2.) +``` + +The only thing that is left to do now is collect the `Js` and return +a tuple of our forward value and the `Js`. +```@example lec08 +using IRTools: insertafter!, substitute, xcall, stmt + +xtuple(xs...) = xcall(Core, :tuple, xs...) + +ir = @code_ir foo(2.) +pr = IRTools.Pipe(ir) +Js = IRTools.Variable[] + +for (v,statement) in pr + ex = statement.expr + rr = xcall(rrule, ex.args...) # ex.args = (f,x) + vJ = insert!(pr, v, stmt(rr, line = ir[v].line)) + pr[v] = xgetindex(vJ,1) + + # collect Js + J = insertafter!(pr, v, stmt(xgetindex(vJ,2), line=ir[v].line)) + push!(Js, substitute(pr, J)) +end +ir = IRTools.finish(pr) +# add the collected `Js` to `ir` +Js = push!(ir, xtuple(Js...)) +# return a tuple of the last `v` and `Js` +ret = ir.blocks[end].branches[end].args[1] +IRTools.return!(ir, xtuple(ret, Js)) +ir +#msg = """ +#ir = 1: (%1, %2) ## rrule(foo, x) +# %3 = (Main.rrule)(Main.f, %2) ## ra = rrule(f,x) +# %4 = Base.getindex(%3, 1) ## a = ra[1] +# %5 = Base.getindex(%3, 2) ## Ja = ra[2] +# %6 = (Main.rrule)(Main.g, %4) ## rb = rrule(g,a) +# %7 = Base.getindex(%6, 1) ## b = rb[1] +# %8 = Base.getindex(%6, 2) ## Jb = rb[2] +# %9 = (Main.rrule)(Main.h, %7) ## ry = rrule(h,b) +# %10 = Base.getindex(%9, 1) ## y = ry[1] +# %11 = Base.getindex(%9, 2) ## Jy = ry[2] +# %12 = Core.tuple(%5, %8, %11) ## Js = (Ja,Jb,Jy) +# %13 = Core.tuple(%10, %12) ## rr = (y, Js) +# return %13 ## return rr +#""" +#println(msg) +``` +The resulting IR can be evaluated to the forward pass value and the Jacobians: +```@repl lec08 +(y, Js) = evalir(ir, foo, 2.) +``` +To compute the derivative given the tuple of `Js` we just need to compose them +and set the initial gradient to one: +```@repl lec08 +reduce(|>, Js, init=1) # Ja(Jb(Jy(1))) +``` +The code for transforming the IR as described above looks like this. +```@example lec08 +function transform(ir, x) + pr = IRTools.Pipe(ir) + Js = IRTools.Variable[] + + # loop over each line in the IR + for (v,statement) in pr + ex = statement.expr + # insert the rrule + rr = xcall(rrule, ex.args...) # ex.args = (f,x) + vJ = insert!(pr, v, stmt(rr, line = ir[v].line)) + # replace original line with f(x) from rrule + pr[v] = xgetindex(vJ,1) + + # save jacobian in a variable + J = insertafter!(pr, v, stmt(xgetindex(vJ,2), line=ir[v].line)) + # add it to a list of jacobians + push!(Js, substitute(pr, J)) + end + ir = IRTools.finish(pr) + # add the collected `Js` to `ir` + Js = push!(ir, xtuple(Js...)) + # return a tuple of the foo(x) and `Js` + ret = ir.blocks[end].branches[end].args[1] + IRTools.return!(ir, xtuple(ret, Js)) + return ir +end + +xgetindex(x, i...) = xcall(Base, :getindex, x, i...) +xtuple(xs...) = xcall(Core, :tuple, xs...) +nothing # hide +``` +Now we can write a general `rrule` that can differentiate any function +composed of our defined `rrule`s +```@example lec08 +function rrule(f, x) + ir = @code_ir f(x) + ir_derived = transform(ir,x) + y, Js = evalir(ir_derived, nothing, x) + df(Δ) = reduce(|>, Js, init=Δ) + return y, df +end + + +reverse(f,x) = rrule(f,x)[2](one(x)) +nothing # hide +``` +Finally, we just have to use `reverse` to compute the gradient +```@example lec08 +plot(-2:0.1:2, foo, label="f(x) = 5sin(x^2)", lw=3) +plot!(-2:0.1:2, x->10x*cos(x^2), label="Analytic f'", ls=:dot, lw=3) +plot!(-2:0.1:2, x->reverse(foo,x), label="Dual Forward Mode f'", lw=3, ls=:dash) +``` + +--- +- Efficiency of the forward pass becomes essentially a compiler problem +- If we define specialized rules we will gain performance +--- + +# Performance Forward vs. Reverse + +This section compares the performance of three different, widely used Julia AD +systems `ForwardDiff.jl` (forward mode), `ReverseDiff.jl` (tracing-based +reverse mode), and `Zygote.jl` (source-to-source reverse mode), as well as JAX +forward/reverse modes. + +As a benchmark function we can compute the Jacobian of $f:\mathbb R^N +\rightarrow \mathbb R^M$ with respect to $\bm x$. +In the benchmark we test various different values of $N$ and $M$ to show the +differences between the backends. +```math +f(\bm x) = (\bm W \bm x + \bm b)^2 +``` + +```@setup lec08 +using DataFrames +using DrWatson +using Glob + + +julia_res = map(glob("julia-*.txt")) do fname + d = parse_savename(replace(fname, "julia-"=>""))[2] + @unpack N, M = d + lines = open(fname) |> eachline + map(lines) do line + s = split(line, ":") + backend = s[1] + time = parse(Float32, s[2]) / 10^6 + (backend, time, "$(N)x$(M)") + end +end + +jax_res = map(glob("jax-*.txt")) do fname + d = parse_savename(replace(fname, "jax-"=>""))[2] + @unpack N, M = d + lines = open(fname) |> eachline + map(lines) do line + s = split(line, ":") + backend = s[1] + time = parse(Float32, s[2]) * 10^3 + (backend, time, "$(N)x$(M)") + end +end + +res = vcat(julia_res, jax_res) + +df = DataFrame(reduce(vcat, res)) +df = unstack(df, 3, 1, 2) +ns = names(df) +ns[1] = "N x M" +rename!(df, ns) +df = DataFrame([[names(df)]; collect.(eachrow(df))], [:column; Symbol.(axes(df, 1))]) + +ns = df[1,:] |> values |> collect +rename!(df, ns) +``` +```@example lec08 +df[2:end,:] # hide +``` diff --git a/docs_vitepress/src/lectures/lecture_09/jax-N=10000_M=1.txt b/docs_vitepress/src/lectures/lecture_09/jax-N=10000_M=1.txt new file mode 100644 index 00000000..f84cf760 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/jax-N=10000_M=1.txt @@ -0,0 +1,2 @@ +JAX (forward): 0.05906954288482666 +JAX (reverse): 0.004635481834411621 \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_09/jax-N=1000_M=1.txt b/docs_vitepress/src/lectures/lecture_09/jax-N=1000_M=1.txt new file mode 100644 index 00000000..08cc0e23 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/jax-N=1000_M=1.txt @@ -0,0 +1,2 @@ +JAX (forward): 0.005011649131774903 +JAX (reverse): 0.003611617088317871 \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=1.txt b/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=1.txt new file mode 100644 index 00000000..a21c8604 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=1.txt @@ -0,0 +1,2 @@ +JAX (forward): 0.00287567138671875 +JAX (reverse): 0.0031633067131042482 \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=100.txt b/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=100.txt new file mode 100644 index 00000000..12288a04 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=100.txt @@ -0,0 +1,2 @@ +JAX (forward): 0.002484147548675537 +JAX (reverse): 0.0030963587760925292 \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=1000.txt b/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=1000.txt new file mode 100644 index 00000000..fadcf377 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/jax-N=100_M=1000.txt @@ -0,0 +1,2 @@ +JAX (forward): 0.004091489315032959 +JAX (reverse): 0.007975153923034668 \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_09/julia-N=10000_M=1.txt b/docs_vitepress/src/lectures/lecture_09/julia-N=10000_M=1.txt new file mode 100644 index 00000000..12c8dc07 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/julia-N=10000_M=1.txt @@ -0,0 +1,3 @@ +ForwardDiff.jl: 2.940003313e7 +ReverseDiff.jl: 271417.83 +Zygote.jl: 48022.63 diff --git a/docs_vitepress/src/lectures/lecture_09/julia-N=1000_M=1.txt b/docs_vitepress/src/lectures/lecture_09/julia-N=1000_M=1.txt new file mode 100644 index 00000000..336ef6b3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/julia-N=1000_M=1.txt @@ -0,0 +1,3 @@ +ForwardDiff.jl: 381397.27 +ReverseDiff.jl: 68533.79 +Zygote.jl: 6498.564999999999 diff --git a/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=1.txt b/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=1.txt new file mode 100644 index 00000000..9faa5ab5 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=1.txt @@ -0,0 +1,3 @@ +ForwardDiff.jl: 11961.82 +ReverseDiff.jl: 59821.06 +Zygote.jl: 1546.8603333333335 diff --git a/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=100.txt b/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=100.txt new file mode 100644 index 00000000..dc384a3e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=100.txt @@ -0,0 +1,3 @@ +ForwardDiff.jl: 215920.0 +ReverseDiff.jl: 794220.7 +Zygote.jl: 20034.84 diff --git a/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=1000.txt b/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=1000.txt new file mode 100644 index 00000000..834dc99e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/julia-N=100_M=1000.txt @@ -0,0 +1,3 @@ +ForwardDiff.jl: 2.92624655e6 +ReverseDiff.jl: 1.179761551e7 +Zygote.jl: 173701.21 diff --git a/docs_vitepress/src/lectures/lecture_09/lab.md b/docs_vitepress/src/lectures/lecture_09/lab.md new file mode 100644 index 00000000..adf0eb8b --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/lab.md @@ -0,0 +1,631 @@ +# Lab 09 - Generated Functions & IR + +In this lab you will practice two advanced meta programming techniques: + +* _**Generated functions**_ can help you write specialized code for certain + kinds of parametric types with more flexibility and/or less code. +* _**IRTools.jl**_ is a package that simplifies the manipulation of lowered and + typed Julia code + +```@setup lab09 +using BenchmarkTools +``` + +## `@generate`d Functions + +Remember the three most important things about generated functions: + +* They return *quoted expressions* (like macros). +* You have access to type information of your input variables. +* They have to be _**pure**_ + +### A faster `polynomial` + +Throughout this course we have come back to our `polynomial` function which +evaluates a polynomial based on the Horner schema. Below you can find a version +of the function that operates on a tuple of length $N$. + +```@example lab09 +function polynomial(x, p::NTuple{N}) where N + acc = p[N] + for i in N-1:-1:1 + acc = x*acc + p[i] + end + acc +end +nothing # hide +``` + +Julia has its own implementation of this function called `evalpoly`. If we +compare the performance of our `polynomial` and Julia's `evalpoly` we can +observe a pretty big difference: + +```@repl lab09 +x = 2.0 +p = ntuple(float,20); + +@btime polynomial($x,$p) +@btime evalpoly($x,$p) +``` + +Julia's implementation uses a generated function which specializes on different +tuple lengths (i.e. it *unrolls* the loop) and eliminates the (small) overhead +of looping over the tuple. This is possible, because the length of the tuple is +known during compile time. You can check the difference between `polynomial` +and `evalpoly` yourself via the introspectionwtools you know - e.g. +`@code_lowered`. + + +::: warning Exercise + +Rewrite the `polynomial` function as a generated function with the signature + +```julia +genpoly(x::Number, p::NTuple{N}) where N +``` + +**Hints:** +* Remember that you have to generate a quoted expression inside your generated + function, so you will need things like `:($expr1 + $expr2)`. +* You can debug the expression you are generating by omitting the `@generated` + macro from your function. + +::: + +::: details Show solution + +```@example lab09 +@generated function genpoly(x, p::NTuple{N}) where N + ex = :(p[$N]) + for i in N-1:-1:1 + ex = :(x*$ex + p[$i]) + end + ex +end +nothing # hide +``` + +::: + +You should get the same performance as `evalpoly` (and as `@poly` from Lab 7 with +the added convenience of not having to spell out all the coefficients in your code +like: `p = @poly 1 2 3 ...`). + +```@repl lab09 +@btime genpoly($x,$p) +``` + + + +### Fast, Static Matrices + +Another great example that makes heavy use of generated functions are *static +arrays*. A static array is an array of fixed size which can be implemented via +an `NTuple`. This means that it will be allocated on the stack, which can buy +us a lot of performance for smaller static arrays. We define a +`StaticMatrix{T,C,R,L}` where the paramteric types represent the matrix element +type `T` (e.g. `Float32`), the number of rows `R`, the number of columns `C`, +and the total length of the matrix `L=C*R` (which we need to set the size of +the `NTuple`). + +```@example lab09 +struct StaticMatrix{T,R,C,L} <: AbstractArray{T,2} + data::NTuple{L,T} +end + +function StaticMatrix(x::AbstractMatrix{T}) where T + (R,C) = size(x) + StaticMatrix{T,R,C,C*R}(x |> Tuple) +end +nothing # hide +``` + +::: warning Exercise + +As a warm-up, overload the `Base` functions `size`, `length`, +`getindex(x::StaticMatrix,i::Int)`, and `getindex(x::Solution,r::Int,c::Int)`. + +::: + +::: details Show solution + +```@example lab09 +Base.size(x::StaticMatrix{T,R,C}) where {T,R,C} = (R,C) +Base.length(x::StaticMatrix{T,R,C,L}) where {T,R,C,L} = L +Base.getindex(x::StaticMatrix, i::Int) = x.data[i] +Base.getindex(x::StaticMatrix{T,R,C}, r::Int, c::Int) where {T,R,C} = x.data[R*(c-1) + r] +``` + +::: + +You can check if everything works correctly by comparing to a normal `Matrix`: + +```@repl lab09 +x = rand(2,3) +x[1,2] +a = StaticMatrix(x) +a[1,2] +``` + +::: warning Exercise + +Overload matrix multiplication between two static matrices + +```julia +Base.:*(x::StaticMatrix{T,K,M},y::StaticMatrix{T,M,N}) +``` + +with a generated function that creates an expression without loops. Below you +can see an example for an expression that would be generated from multiplying +two $2\times 2$ matrices. + +```julia +:(StaticMatrix{T,2,2,4}(( + (x[1,1]*y[1,1] + x[1,2]*y[2,1]), + (x[2,1]*y[1,1] + x[2,2]*y[2,1]), + (x[1,1]*y[1,2] + x[1,2]*y[2,2]), + (x[2,1]*y[1,2] + x[2,2]*y[2,2]) +))) +``` + +**Hints:** + +* You can get output like above by leaving out the `@generated` in front of your + overload. +* It might be helpful to implement matrix multiplication in a *normal* Julia + function first. +* You can construct an expression for a sum of multiple elements like below. + +```@repl lab09 +Expr(:call,:+,1,2,3) +Expr(:call,:+,1,2,3) |> eval +``` + +::: + +::: details Show solution + +```@example lab09 +@generated function Base.:*(x::StaticMatrix{T,K,M}, y::StaticMatrix{T,M,N}) where {T,K,M,N} + zs = map(Iterators.product(1:K, 1:N) |> collect |> vec) do (k,n) + Expr(:call, :+, [:(x[$k,$m] * y[$m,$n]) for m=1:M]...) + end + z = Expr(:tuple, zs...) + :(StaticMatrix{$T,$K,$N,$(K*N)}($z)) +end +nothing # hide +``` + +::: + +You can check that your matrix multiplication works by multiplying two random +matrices. Which one is faster? + +```@repl lab09 +a = rand(2,3) +b = rand(3,4) +c = StaticMatrix(a) +d = StaticMatrix(b) +a*b +c*d +``` + +## `OptionalArgChecks.jl` + +The package [`OptionalArgChecks.jl`](https://github.com/simeonschaub/OptionalArgChecks.jl) +makes is possible to add checks to a function which can then be removed by +calling the function with the `@skip` macro. For example, we can check if the +input to a function `f` is an even number + +```@example lab09 +function f(x::Number) + iseven(x) || error("Input has to be an even number!") + x +end +nothing # hide +``` + +If you are doing more involved argument checking it can take quite some time to +perform all your checks. However, if you want to be fast and are completely +sure that you are always passing in the correct inputs to your function, you +might want to remove them in some cases. Hence, we would like to transform the +IR of the function above + +```@repl lab09 +using IRTools +using IRTools: @code_ir +@code_ir f(1) +``` + +To some thing like this + +```@repl lab09 +transformed_f(x::Number) = x +@code_ir transformed_f(1) +``` + +### Marking Argument Checks + +As a first step we will implement a macro that marks checks which we might want +to remove later by surrounding it with `:meta` expressions. This will make it +easy to detect which part of the code can be removed. A `:meta` expression can +be created like this + +```@repl lab09 +Expr(:meta, :mark_begin) +Expr(:meta, :mark_end) +``` + +and they will not be evaluated but remain in your IR. To surround an expression +with two meta expressions you can use a `:block` expression: + +```@repl lab09 +ex = :(x+x) +Expr(:block, :(print(x)), ex, :(print(x))) +``` + +::: warning Exercise + +Define a macro `@mark` that takes an expression and surrounds it with two +meta expressions marking the beginning and end of a check. + +**Hints** +* Defining a function `_mark(ex::Expr)` which manipulates your expressions can + help a lot with debugging your macro. + +::: + +::: details Show solution + +```@example lab09 +function _mark(ex::Expr) + return Expr( + :block, + Expr(:meta, :mark_begin), + esc(ex), + Expr(:meta, :mark_end), + ) +end + +macro mark(ex) + _mark(ex) +end +nothing # hide +``` + +::: + +If you have defined a `_mark` function you can test that it works like this + +```@repl lab09 +_mark(:(println(x))) +``` + +The complete macro should work like below + +```@repl lab09 +function f(x::Number) + @mark @show x + x +end; +@code_ir f(2) +f(2) +``` + +### Removing Argument Checks + +Now comes tricky part for which we need `IRTools.jl`. +We want to remove all lines that are between our two meta blocks. +You can delete the line that corresponds to a certain variable with the `delete!` +and the `var` functions. +E.g. deleting the line that defines variable `%4` works like this: + +```@repl lab09 +using IRTools: delete!, var + +ir = @code_ir f(2) +delete!(ir, var(4)) +``` + +::: warning Exercise + +Write a function `skip(ir::IR)` which deletes all lines between the meta +expression `:mark_begin` and `:mark_end`. + +**Hints** +You can check whether a statement is one of our meta expressions like this: + +```@repl lab09 +ismarkbegin(e::Expr) = Meta.isexpr(e,:meta) && e.args[1]===:mark_begin +ismarkbegin(Expr(:meta,:mark_begin)) +``` + +::: + +::: details Show solution + +```@example lab09 +ismarkend(e::Expr) = Meta.isexpr(e,:meta) && e.args[1]===:mark_end + +function skip(ir) + delete_line = false + for (x,st) in ir + isbegin = ismarkbegin(st.expr) + isend = ismarkend(st.expr) + + if isbegin + delete_line = true + end + + if delete_line + delete!(ir,x) + end + + if isend + delete_line = false + end + end + ir +end +nothing # hide +``` + +::: + +Your function should transform the IR of `f` like below. + +```@repl lab09 +ir = @code_ir f(2) +ir = skip(ir) + +using IRTools: func +func(ir)(nothing, 2) # no output from @show! +``` + +However, if we have a slightly more complicated IR like below this version of +our function will fail. It actually fails so badly that running +`func(ir)(nothing,2)` after `skip` will cause the build of this page to crash, +so we cannot show you the output here ;). + +```@repl lab09 +function g(x) + @mark iseven(x) && println("even") + x +end + +ir = @code_ir g(2) +ir = skip(ir) +``` + +The crash is due to `%4` not existing anymore. We can fix this by emptying the +block in which we found the `:mark_begin` expression and branching to the +block that contains `:mark_end` (unless they are in the same block already). +If some (branching) code in between remained, it should then be removed by the +compiler because it is never reached. + +::: warning Exercise + +Use the functions `IRTools.block`, `IRTools.branches`, `IRTools.empty!`, and +`IRTools.branch!` to modify `skip` such that it also empties the `:mark_begin` +block, and adds a branch to the `:mark_end` block (unless they are the same +block). + +**Hints** +* `block` gets you the block of IR in which a given variable is if you call e.g. `block(ir,var(4))`. +* `empty!` removes all statements in a block. +* `branches` returns all branches of a block. +* `branch!(a,b)` creates a branch from the end of block `a` to the beginning + block `b` + +::: + +::: details Show solution + +```@example lab09 +using IRTools: block, branch!, empty!, branches +function skip(ir) + delete_line = false + orig = nothing + for (x,st) in ir + isbegin = ismarkbegin(st.expr) + isend = ismarkend(st.expr) + + if isbegin + delete_line = true + end + + # this part is new + if isbegin + orig = block(ir,x) + elseif isend + dest = block(ir,x) + if orig != dest + empty!(branches(orig)) + branch!(orig,dest) + end + end + + if delete_line + delete!(ir,x) + end + + if isend + delete_line = false + end + end + ir +end +nothing # hide +``` + +::: + +The result should construct valid IR for our `g` function. + +```@repl lab09 +g(2) +ir = @code_ir g(2) +ir = skip(ir) +func(ir)(nothing,2) +``` + +And it should not break when applying it to `f`. + +```@repl lab09 +f(2) +ir = @code_ir f(2) +ir = skip(ir) +func(ir)(nothing,2) +``` + +### Recursively Removing Argument Checks + +The last step to finalize the `skip` function is to make it work recursively. +In the current version we can handle functions that contain `@mark` statements, +but we are not going any deeper than that. Nested functions will not be touched: + +```@example lab09 +foo(x) = bar(baz(x)) + +function bar(x) + @mark iseven(x) && println("The input is even.") + x +end + +function baz(x) + @mark x<0 && println("The input is negative.") + x +end + +nothing # hide +``` + +```@repl lab09 +ir = @code_ir foo(-2) +ir = skip(ir) +func(ir)(nothing,-2) +``` + +For recursion we will use the macro `IRTools.@dynamo` which will make recursion +of our `skip` function a lot easier. Additionally, it will save us from all the +`func(ir)(nothing, args...)` statements. To use `@dynamo` we have to slightly +modify how we call `skip`: + +```julia +@dynamo function skip(args...) + ir = IR(args...) + + # same code as before that modifies `ir` + # ... + + return ir +end + +# now we can call `skip` like this +skip(f,2) +``` + +Now we can easily use `skip` in recursion, because we can just pass the +arguments of an expression like this: + +```julia +using IRTools: xcall + +for (x,st) in ir + isexpr(st.expr,:call) || continue + ir[x] = xcall(skip, st.expr.args...) +end +``` + +The function `xcall` will create an expression that calls `skip` with the given +arguments and returns `Expr(:call, skip, args...)`. Note that you can modify +expressions of a given variable in the IR via `setindex!`. + +::: warning Exercise + +Modify `skip` such that it uses `@dynamo` and apply it recursively to all +`:call` expressions that you ecounter while looping over the given IR. +This will dive all the way down to `Core.Builtin`s and `Core.IntrinsicFunction`s +which you cannot maniuplate anymore (because they are written in C). +You have to end the recursion at these places which can be done via multiple +dispatch of `skip` on `Builtin`s and `IntrinsicFunction`s. + +Once you are done with this you can also define a macro such that you can +conveniently call `@skip` with an expression: + +```julia +skip(f,2) +@skip f(2) +``` + +::: + +::: details Show solution + +```@example lab09 +using IRTools: @dynamo, xcall, IR + +# this is where we want to stop recursion +skip(f::Core.IntrinsicFunction, args...) = f(args...) +skip(f::Core.Builtin, args...) = f(args...) + +@dynamo function skip(args...) + ir = IR(args...) + delete_line = false + orig = nothing + for (x,st) in ir + isbegin = ismarkbegin(st.expr) + isend = ismarkend(st.expr) + + if isbegin + delete_line = true + end + + if isbegin + orig = block(ir,x) + elseif isend + dest = block(ir,x) + if orig != dest + empty!(branches(orig)) + branch!(orig,dest) + end + end + + if delete_line + delete!(ir,x) + end + + if isend + delete_line = false + end + + # this part is new + if haskey(ir,x) && Meta.isexpr(st.expr,:call) + ir[x] = xcall(skip, st.expr.args...) + end + end + return ir +end + +macro skip(ex) + ex.head == :call || error("Input expression has to be a `:call`.") + return xcall(skip, ex.args...) +end +nothing # hide +``` + +::: + +```@repl lab09 +@code_ir foo(2) +@code_ir skip(foo,2) +foo(-2) +skip(foo,-2) +@skip foo(-2) +``` + +## References + +* [Static matrices](https://wesselb.github.io/2020/12/13/julia-learning-circle-meeting-3.html) with `@generate`d functions blog post +* [`OptionalArgChecks.jl`](https://github.com/simeonschaub/OptionalArgChecks.jl) +* IRTools [Dynamo](https://fluxml.ai/IRTools.jl/latest/dynamo/) diff --git a/docs_vitepress/src/lectures/lecture_09/lecture.md b/docs_vitepress/src/lectures/lecture_09/lecture.md new file mode 100644 index 00000000..018198ad --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/lecture.md @@ -0,0 +1,1003 @@ +# Manipulating Intermediate Represenation (IR) + +```@setup lec09 +using InteractiveUtils: @code_typed, @code_lowered, code_lowered +``` + +## Generated functions + +Sometimes it is convenient to generate function once types of arguments are known. For example if we have function `foo(args...)`, we can generate different body for different length of `Tuple` and types in `args`. Do we really need such thing, or it is just wish of curious programmer? Not really, as + +- we can deal with variability of `args` using normal control-flow logic `if length(args) == 1 elseif ...` +- we can (automatically) generate (a possibly very large) set of functions `foo` specialized for each length of `args` (or combination of types of `args`) and let multiple dispatch to deal with this +- we cannot deal with this situation with macros, because macros do not see types, only parsed AST, which is in this case always the same. + +Generated functions allow to specialize the code for a given type of argumnets. They are like macros in the sense that they **return expressions** and not **results**. But unlike macros, the input is not expression or value of arguments, but their types (the arguments are of type `Type`). They are also called when compiler needs (which means at least once for each combination of arguments, but possibly more times due to code invalidation). + +Let's look at an example + +```@example lec09 +@generated function genplus(x, y) + println("generating genplus(x, y)") + @show (x, y, typeof(x), typeof(y)) + quote + println("executing generated genplus(x, y)") + @show (x, y, typeof(x), typeof(y)) + x + y + end +end +nothing # hide +``` + +and observe the output + +```julia +julia> genplus(1.0, 1.0) == 1.0 + 1.0 +generating genplus(x, y) +(x, y, typeof(x), typeof(y)) = (Float64, Float64, DataType, DataType) +executing generated genplus(x, y) +(x, y, typeof(x), typeof(y)) = (1.0, 1.0, Float64, Float64) +true + +julia> genplus(1.0, 1.0) == 1.0 + 1.0 +executing generated genplus(x, y) +(x, y, typeof(x), typeof(y)) = (1.0, 1.0, Float64, Float64) +true + +julia> genplus(1, 1) == 1 + 1 +generating genplus(x, y) +(x, y, typeof(x), typeof(y)) = (Int64, Int64, DataType, DataType) +executing generated genplus(x, y) +(x, y, typeof(x), typeof(y)) = (1, 1, Int64, Int64) +true +``` + +which shows that the body of `genplus` is called for each combination of types of parameters, but the generated code is called whenever `genplus` is called. + +Generated functions has to be pure in the sense that they are not allowed to have side effects, for example modifying some global variables. Note that printing is not allowed in pure functions, as it modifies the global buffer. From the above example this rule does not seems to be enforced, but not obeying it can lead to unexpected errors mostly caused by not knowing when and how many times the functions will be called. + +Finally, generated functions cannot call functions that has been defined after their definition. + +```@repl lec09 +@generated function genplus(x, y) + foo() + :(x + y) +end + +foo() = println("foo") +genplus(1,1) +``` + +Here, the *applicable method* is `foo`. + +### An example that explains everything. + +Consider a version of `map` applicable to `NamedTuple`s with permuted names. +Recall the behavior of normal map, which works if the names are in the same order. + +```@repl lec09 +x = (a = 1, b = 2, c = 3) +y = (a = 4, b = 5, c = 6) +map(+, x, y) +``` + +The same does not work with permuted names: + +```@repl lec09 +x = (a = 1, b = 2, c = 3) +y = (c = 6, b = 5, a = 4) +map(+, x, y) +``` + +How to fix this? The usual approach would be to iterate over the keys in named tuples: + +```@example lec09 +function permuted_map(f, x::NamedTuple{KX}, y::NamedTuple{KY}) where {KX, KY} + ks = tuple(intersect(KX,KY)...) + NamedTuple{ks}(map(k -> f(x[k], y[k]), ks)) +end +nothing # hide +``` + +But, can we do better? Recall that in `NamedTuple`s, we exactly know the position of the arguments, hence we should be able to directly match the corresponding arguments without using `get`. + +Since creation (and debugging) of generated functions is difficult, we start with a single-argument unrolled map. + +```@repl +@generated function unrolled_map(f, x::NamedTuple{KX}) where {KX} + vals = [:(f(getfield(x, $(QuoteNode(k))))) for k in KX] + :(($(vals...),)) +end +unrolled_map(e->e+1, x) +``` + +We see that inserting a `Symbol` specifying the field in the `NamedTuple` is a +bit tricky. It needs to be quoted, since `$()` which is needed to substitute +`k` for its value "peels" one layer of the quoting. Compare this to + +```@repl +vals = [:(f(getfield(x, $(k)))) for k in KX] +``` + +Since getting the field is awkward, we write syntactic sugar for that + +```julia +_get(name, k) = :(getfield($(name), $(QuoteNode(k)))) +``` + +with that, we proceed to a nicer two argument function which we have desired: + +```@repl lec09 +@generated function unrolled_map(f, x::NamedTuple{KX}, y::NamedTuple{KY}) where {KX, KY} + ks = tuple(intersect(KX,KY)...) + _get(name, k) = :(getfield($(name), $(QuoteNode(k)))) + vals = [:(f($(_get(:x, k)), $(_get(:y, k)))) for k in ks] + :(NamedTuple{$(ks)}(($(vals...),))) +end +nothing # hide +``` + +We can check that the `unrolled_map` unrolls the map and generates just needed operations + +```@repl lec09 +@code_typed unrolled_map(+, x, y) +``` + +and compare this to the code generated by the non-generated version `permuted_map`: + +```julia +@code_typed permuted_map(+, x, y) +``` + +which is not shown here for the sake of conciseness. + +For fun, we can create a version which replaces the `Symbol` arguments directly by position numbers + +```julia +@generated function unrolled_map(f, x::NamedTuple{KX}, y::NamedTuple{KY}) where {KX, KY} + ks = tuple(intersect(KX,KY)...) + _get(name, k, KS) = :(getfield($(name), $(findfirst(k .== KS)))) + vals = [:(f($(_get(:x, k, KX)), $(_get(:y, k, KY)))) for k in KX] + :(NamedTuple{$(KX)}(($(vals...),))) +end +``` + +## Optionally generated functions + +Macro `@generated` is expanded to + +```julia +julia> @macroexpand @generated function gentest(x) + return :(x + x) + end + +:(function gentest(x) + if $(Expr(:generated)) + return $(Expr(:copyast, :($(QuoteNode(:(x + x)))))) + else + $(Expr(:meta, :generated_only)) + return + end + end) +``` + +which is a function with an if-condition, where the first branch `$(Expr(:generated))` generates the expression `:(x + x)` and returns it. The other spits out an error saying that the function has only a generated version. This suggests the possibility (and reality) that one can implement two versions of the same function; A generated and a *normal* version. It is left up to the compiler to decide which one to use. It is entirely up to the author to ensure that both versions are the same. Which version will the compiler take? The last comment on [23168](https://github.com/JuliaLang/julia/pull/23168) (as of time of writing) states: + +> *Currently the `@generated` branch is always used. In the future, which branch is used will mostly depend on whether the JIT compiler is enabled and available, and if it's not available, then it will depend on how much we were able to compile before the compiler was taken away. So I think it will mostly be a concern for those that might need static compilation and JIT-less deployment.* + +## Contextual dispatch / overdubbing + +Imagine that under some circumstances (context), you would like to use alternative implementations of some functions. One of the most cited motivations for this is automatic differentiation, where you would like to take the code **as-is** and calculate gradients with respect to some variables. Other use cases of this approach are mentioned in `Cassette.jl`: + +> *Downstream applications for Cassette include dynamic code analysis (e.g. profiling, record and replay style debugging, etc.), JIT compilation to new hardware/software backends, automatic differentiation, interval constraint programming, automatic parallelization/rescheduling, automatic memoization, lightweight multistage programming, graph extraction, and more.* + +In theory, we can do all the above by directly modifying the code or introducing new types, but that may require a lot of coding and changing of foreign libraries. + +The technique we desire is called contextual dispatch, which means that under some context, we invoke a different function. The library `Casette.jl` provides a high-level API for overdubbing, but it is interesting to see, how it works, as it shows, how we can "interact" with the lowered code before the code is typed. + +### Insertion of code + +Imagine that julia has compiled some function. For example + +```julia +foo(x,y) = x * y + sin(x) +``` + +and observe its lowered SSA format + +```julia +julia> @code_lowered foo(1.0, 1.0) +CodeInfo( +1 ─ %1 = x * y +│ %2 = Main.sin(x) +│ %3 = %1 + %2 +└── return %3 +) +``` + +The lowered form is convenient, because on the left hand, there is **always** one variable and the right-hand side is simplified to have (mostly) a single call / expression. Moreover, in the lowered form, all control flow operations like `if`, `for`, `while` and exceptions are converted to `Goto` and `GotoIfNot`, which simplifies their handling. + +### Codeinfo + +We can access the lowered form by + +```julia +ci = @code_lowered foo(1.0, 1.0) +``` + +which returns an object of type `CodeInfo` containing many fields [docs](https://docs.julialang.org/en/v1/devdocs/ast/#Lowered-form). To make the investigation slightly more interesting, we modify the function a bit to have local variables: + +```@repl lec09 +function foo(x,y) + z = x * y + z + sin(x) +end + +ci = @code_lowered foo(1.0, 1.0) +``` + +The most important (and interesting) field is `code`: + +```@repl lec09 +ci.code +``` + +It contains expressions corresponding to each line of the lowered form. You are free to access them (and modify them with care). Variables identified with underscore `Int`, for example `_2`, are slotted variables which are variables which have a name in the code, defined via input arguments or through an explicit assignment `:(=)`. The names of slotted variables are stored in `ci.slotnames` and they are of type + +```@repl lec09 +typeof(ci.code[1].args[2].args[2]) +ci.slotnames[ci.code[1].args[2].args[2].id] +ci.slotnames[ci.code[1].args[2].args[3].id] +ci.slotnames[ci.code[1].args[1].id] +``` + +The remaining variables are identified by an integer with prefix `%`, where the number corresponds to the line (index in `ci.code`), in which the variable was created. For example the fourth line `:(%2 + %3)` adds the results of the second line `:(_4)` containing variable `z` and the third line `:(Main.sin(_2))`. The type of each slot variable is stored in `slottypes`, which provides some information about how the variable is used ([see docs](https://docs.julialang.org/en/v1/devdocs/ast/#CodeInfo)). Note that if you modify / introduce slot variables, the length of `slotnames` and `slottypes` has to match and it has to be equal to the maximum number of slotted variables. + +`CodeInfo` also contains information about the source code. Each item of `ci.code` has an identifier in `ci.codelocs` which is an index into `ci.linetable` containing `Core.LineInfoNode` identifying lines in the source code (or in the REPL). Notice that `ci.linetable` is generally shorter then `ci.codelocs`, as one line of source code can be translated to multiple lines in lowered code. + +The important feature of the lowered form is that we can freely edit (create new) `CodeInfo` and that generated functions can return a `CodeInfo` object instead of the AST. However, you need to **explicitly** write a `return` statement ([see issue 25678](https://github.com/JuliaLang/julia/issues/25678)). + +### Strategy for overdubbing + +In overdubbing, our intention is to recursively dive into called function definitions and modify / change their code. In our example below, with which we will demonstrate the manual implementation (for educational purposes), our goal is to enclose each function call with statements that log the exection time. This means we would like to implement a simplified recording profiler. This functionality cannot be implemented by a macros, since macros do not allow us to dive into function definitions. For example, in our function `foo`, we would would not be able to dive into the definition of `sin` (not that this is a terribly good idea, but the point should be clear). + +The overdubbing pattern works as follows. + +1. We define a `@generated function overdub(f, args...)` which takes as a first argument a function `f` and then its arguments. +2. In the function `overdub` we retrieve the `CodeInfo` for `f(args...)`, which is possible as we know types of the arguments at this time. +3. We modify the the `CodeInfo` of `f(args...)` according to our liking. Importantly, we replace all function calls `some_fun(some_args...)` with `overdub(some_fun, some_args...)` which establishes the recursive pattern. +4. Modify the arguments of the `CodeInfo` of `f(args...)` to match `overdub(f, args..)`. +5. Return the modified `CodeInfo`. + +#### The profiler + +The implementation of the simplified logging profiler is straightforward and looks as follows. + +```julia +module LoggingProfiler + +struct Calls + stamps::Vector{Float64} # contains the time stamps + event::Vector{Symbol} # name of the function that is being recorded + startstop::Vector{Symbol} # if the time stamp corresponds to start or to stop + i::Ref{Int} +end + +function Calls(n::Int) + Calls(Vector{Float64}(undef, n+1), Vector{Symbol}(undef, n+1), Vector{Symbol}(undef, n+1), Ref{Int}(0)) +end + +function Base.show(io::IO, calls::Calls) + offset = 0 + if calls.i[] >= length(calls.stamps) + @warn "The recording buffer was too small, consider increasing it" + end + for i in 1:min(calls.i[], length(calls.stamps)) + offset -= calls.startstop[i] == :stop + foreach(_ -> print(io, " "), 1:max(offset, 0)) + rel_time = calls.stamps[i] - calls.stamps[1] + println(io, calls.event[i], ": ", rel_time) + offset += calls.startstop[i] == :start + end +end + +global const to = Calls(100) + +""" + record_start(ev::Symbol) + + record the start of the event, the time stamp is recorded after all counters are + appropriately increased +""" +record_start(ev::Symbol) = record_start(to, ev) +function record_start(calls, ev::Symbol) + n = calls.i[] = calls.i[] + 1 + n > length(calls.stamps) && return + calls.event[n] = ev + calls.startstop[n] = :start + calls.stamps[n] = time_ns() +end + +""" + record_end(ev::Symbol) + + record the end of the event, the time stamp is recorded before all counters are + appropriately increased +""" + +record_end(ev::Symbol) = record_end(to, ev::Symbol) + +function record_end(calls, ev::Symbol) + t = time_ns() + n = calls.i[] = calls.i[] + 1 + n > length(calls.stamps) && return + calls.event[n] = ev + calls.startstop[n] = :stop + calls.stamps[n] = t +end + +reset!() = to.i[] = 0 + +function Base.resize!(calls::Calls, n::Integer) + resize!(calls.stamps, n) + resize!(calls.event, n) + resize!(calls.startstop, n) +end + +exportname(ex::GlobalRef) = QuoteNode(ex.name) +exportname(ex::Symbol) = QuoteNode(ex) +exportname(ex::Expr) = exportname(ex.args[1]) +exportname(i::Int) = QuoteNode(Symbol("Int(",i,")")) + +function overdubbable(ex::Expr) + ex.head != :call && return(false) + a = ex.args[1] + a != GlobalRef && return(true) + a.mod != Core +end +overdubbable(ex) = false + + +function timable(ex::Expr) + ex.head != :call && return(false) + length(ex.args) < 2 && return(false) + ex.args[1] isa Core.GlobalRef && return(true) + ex.args[1] isa Symbol && return(true) + return(false) +end +timable(ex) = false + +export timable, exportname, overdubbable +end +``` + +The important functions are `report_start` and `report_end` which mark the beggining and end of the executed function. They differ mainly when time is recorded (on the end or on the start of the function call). The profiler has a fixed capacity to prevent garbage collection, which might be increased. + +Let's now describe the individual parts of `overdub` before presenting it in its entirety. +At first, we retrieve the codeinfo `ci` of the overdubbed function. For now, we will just assume we obtain it for example by + +```julia +ci = @code_lowered foo(1.0, 1.0) +``` + +we initialize the new `CodeInfo` object by emptying some dummy function as + +```@example lec09 +dummy() = return +new_ci = code_lowered(dummy, Tuple{})[1] +empty!(new_ci.code) +empty!(new_ci.slotnames) +empty!(new_ci.linetable) +empty!(new_ci.codelocs) +new_ci +``` + +Then, we need to copy the slot variables from the `ci` codeinfo of `foo` to the new codeinfo. Additionally, we have to add the arguments of `overdub(f, args...)` since the compiler sees `overdub(f, args...)` and not `foo(x,y)`: + +```@repl lec09 +new_ci.slotnames = vcat([Symbol("#self#"), :f, :args], ci.slotnames[2:end]) +new_ci.slotflags = vcat([0x00, 0x00, 0x00], ci.slotflags[2:end]) +``` + +Above, we also filled the `slotflags`. Authors admit that names `:f` and `:args` in the above should be replaced by a `gensym`ed name, but they do not anticipate this code to be used outside of this educative example where name-clashes might occur. +We also copy information about the lines from the source code: + +```@repl lec09 +foreach(s -> push!(new_ci.linetable, s), ci.linetable) +``` + +The most difficult part when rewriting `CodeInfo` objects is working with indexes, as the line numbers and left hand side variables are strictly ordered one by one and we need to properly change the indexes to reflect changes we made. We will therefore keep three lists + +```@example lec09 +maps = ( + ssa = Dict{Int, Int}(), + slots = Dict{Int, Any}(), + goto = Dict{Int,Int}(), +) +nothing # hide +``` + +where + +- `slots` maps slot variables in `ci` to those in `new_ci` +- `ssa` maps indexes of left-hand side assignments in `ci` to `new_ci` +- `goto` maps lines to which `GotoNode` and `GotoIfNot` point to variables in `ci` to `new_ci` (in our profiler example, we need to ensure to jump on the beggining of logging of executions) + +Mapping of slots can be initialized in advance, as it is a static shift by `2` : + +```julia +maps.slots[1] = Core.SlotNumber(1) +foreach(i -> maps.slots[i] = Core.SlotNumber(i + 2), 2:length(ci.slotnames)) +``` + +and we can check the correctness by + +```julia +@assert all(ci.slotnames[i] == new_ci.slotnames[maps.slots[i].id] for i in 1:length(ci.slotnames)) #test that +``` + +Equipped with that, we start rewriting the code of `foo(x, y)`. We start by a small preample, where we assign values of `args...` to `x`, and `y`. For the sake of simplicity, we map the slotnames to either `Core.SlotNumber` or to `Core.SSAValues` which simplifies the rewriting logic a bit. + +```julia +newci_no = 0 +args = (Float64, Float64) +for i in 1:length(args) + newci_no +=1 + push!(new_ci.code, Expr(:call, Base.getindex, Core.SlotNumber(3), i)) + maps.slots[i+1] = Core.SSAValue(newci_no) + push!(new_ci.codelocs, ci.codelocs[1]) +end +``` + +Now we come to the pinnacle of rewriting the body of `foo(x,y)` while inserting calls to the profiler: + +```julia +for (ci_no, ex) in enumerate(ci.code) + if timable(ex) + fname = exportname(ex) + push!(new_ci.code, Expr(:call, GlobalRef(LoggingProfiler, :record_start), fname)) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + maps.goto[ci_no] = newci_no + ex = overdubbable(ex) ? Expr(:call, GlobalRef(Main, :overdub), ex.args...) : ex + push!(new_ci.code, ex) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + maps.ssa[ci_no] = newci_no + push!(new_ci.code, Expr(:call, GlobalRef(LoggingProfiler, :record_end), fname)) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + else + push!(new_ci.code, ex) + push!(new_ci.codelocs, ci.codelocs[ci_no]) + newci_no += 1 + maps.ssa[ci_no] = newci_no + end +end +``` + +which yields + +```julia +julia> new_ci.code +15-element Vector{Any}: + :((getindex)(_3, 1)) + :((getindex)(_3, 2)) + :(_4 = _2 * _3) + :(_4) + :(Main.LoggingProfiler.record_start(:sin)) + :(Main.overdub(Main.sin, _2)) + :(Main.LoggingProfiler.record_end(:sin)) + :(Main.LoggingProfiler.record_start(:+)) + :(Main.overdub(Main.:+, %2, %3)) + :(Main.LoggingProfiler.record_end(:+)) + :(return %4) +``` + +The important parts are: + +- Depending on the type of expressions (controlled by `timable`) we decide, if a function's execution time should be recorded. +- `fname = exportname(ex)` obtains the name of the profiled function call. +- `push!(new_ci.code, Expr(:call, GlobalRef(LoggingProfiler, :record_start), fname))` records the start of the exection. +- `maps.goto[ci_ssa_no] = ssa_no` updates the map from the code line number in `ci` to the one in `new_ci`. +- `maps.ssa[ci_ssa_no] = ssa_no` updates the map from the SSA line number in `ci` to `new_ci`. +- `ex = overdubbable(ex) ? Expr(:call, GlobalRef(Main, :overdub), ex.args...) : ex` modifies the function call (expression in general) to recurse the overdubbing. +Finally, we need to change the names of slot variables (`Core.SlotNumber`) and variables indexed by the SSA (`Core.SSAValue`). + +```julia +for i in length(args)+1:length(new_ci.code) + new_ci.code[i] = remap(new_ci.code[i], maps) +end +``` + +where `remap` is defined by the following block of code + +```julia +remap(ex::Expr, maps) = Expr(ex.head, remap(ex.args, maps)...) +remap(args::AbstractArray, maps) = map(a -> remap(a, maps), args) +remap(c::Core.GotoNode, maps) = Core.GotoNode(maps.goto[c.label]) +remap(c::Core.GotoIfNot, maps) = Core.GotoIfNot(remap(c.cond, maps), maps.goto[c.dest]) +remap(r::Core.ReturnNode, maps) = Core.ReturnNode(remap(r.val, maps)) +remap(a::Core.SlotNumber, maps) = maps.slots[a.id] +remap(a::Core.SSAValue, maps) = Core.SSAValue(maps.ssa[a.id]) +remap(a::Core.NewvarNode, maps) = Core.NewvarNode(maps.slots[a.slot.id]) +remap(a::GlobalRef, maps) = a +remap(a::QuoteNode, maps) = a +remap(ex, maps) = ex +``` + +::: danger Retrieving the code properly + +Consider the following function: +```julia +function test(x::T) where T<:Union{Float64, Float32} + x < T(pi) +end + +julia> ci = @code_lowered test(1.0) +CodeInfo( +1 ─ %1 = ($(Expr(:static_parameter, 1)))(Main.pi) +│ %2 = x < %1 +└── return %2 +) +``` +the `Expr(:static_parameter, 1)` in the first line of code obtains the type parameter `T` of the function `test`. Since this information is not accessible in the `CodeInfo`, it might render our tooling useless. The needed hook is `Base.Meta.partially_inline!` which partially inlines this into the `CodeInfo` object. +The code to retrieve the `CodeInfo` adapted from `IRTools` is a little involved: + +```julia +function retrieve_code_info(sigtypes, world = Base.get_world_counter()) + S = Tuple{map(s -> Core.Compiler.has_free_typevars(s) ? typeof(s.parameters[1]) : s, sigtypes)...} + _methods = Base._methods_by_ftype(S, -1, world) + if isempty(_methods) + @info("method $(sigtypes) does not exist") + return(nothing) + end + type_signature, raw_static_params, method = _methods[1] + mi = Core.Compiler.specialize_method(method, type_signature, raw_static_params, false) + ci = Base.isgenerated(mi) ? Core.Compiler.get_staged(mi) : Base.uncompressed_ast(method) + Base.Meta.partially_inline!(ci.code, [], method.sig, Any[raw_static_params...], 0, 0, :propagate) + ci +end +``` +but +```julia +julia> ci = retrieve_code_info((typeof(test), Float64)) +CodeInfo( + @ REPL[5]:2 within `test' +1 ─ %1 = ($(QuoteNode(Float64)))(Main.pi) +│ %2 = x < %1 +└── return %2 +) +``` +it performs the needed inlining of `Float64`. + +::: + +## Implementing the profiler with IRTools + +The above implementation of the profiler has shown, that rewriting IR manually is doable, but requires a lot of careful book-keeping. `IRTools.jl` makes our life much simpler, as they take away all the needed book-keeping and let us focus on what is important. + +```@repl lec09 +using IRTools +function foo(x, y) + z = x * y + z + sin(y) +end; +ir = @code_ir foo(1.0, 1.0) +``` + +We can see that at first sight, the representation of the lowered code in IRTools is similar to that of `CodeInfo`. Some notable differences: + +- `SlotNumber` are converted to `SSAValues` +- SSA form is divided into blocks by `GotoNode` and `GotoIfNot` in the parsed `CodeInfo` +- SSAValues do not need to be ordered. The reordering is deffered to the moment when one converts `IRTools.Inner.IR` back to the `CodeInfo`. + +Let's now use the IRTools to insert the timing statements into the code for `foo`: + +```julia +using IRTools: xcall, insert!, insertafter! + +ir = @code_ir foo(1.0, 1.0) +for (v, ex) in ir + if timable(ex.expr) + fname = exportname(ex.expr) + insert!(ir, v, xcall(LoggingProfiler, :record_start, fname)) + insertafter!(ir, v, xcall(LoggingProfiler, :record_end, fname)) + end +end + +julia> ir +1: (%1, %2, %3) + %7 = Main.LoggingProfiler.record_start(:*) + %4 = %2 * %3 + %8 = Main.LoggingProfiler.record_end(:*) + %9 = Main.LoggingProfiler.record_start(:sin) + %5 = Main.sin(%3) + %10 = Main.LoggingProfiler.record_end(:sin) + %11 = Main.LoggingProfiler.record_start(:+) + %6 = %4 + %5 + %12 = Main.LoggingProfiler.record_end(:+) + return %6 +``` + +Observe that the statements are on the right places but they are not ordered. +We can turn the `ir` object into an anonymous function + +```julia +f = IRTools.func(ir) +LoggingProfiler.reset!() +f(nothing, 1.0, 1.0) +LoggingProfiler.to +``` + +where we can observe that our profiler is working as it should. But this is not yet our final goal. Originally, our goal was to recursivelly dive into the nested functions. IRTools offers a macro `@dynamo`, which is similar to `@generated` but simplifies our job by allowing to return the `IRTools.Inner.IR` object and it also taking care of properly renaming the arguments. With that we write + +```julia +using IRTools: @dynamo +profile_fun(f::Core.IntrinsicFunction, args...) = f(args...) +profile_fun(f::Core.Builtin, args...) = f(args...) + +@dynamo function profile_fun(f, args...) + ir = IRTools.Inner.IR(f, args...) + for (v, ex) in ir + if timable(ex.expr) + fname = exportname(ex.expr) + insert!(ir, v, xcall(LoggingProfiler, :record_start, fname)) + insertafter!(ir, v, xcall(LoggingProfiler, :record_end, fname)) + end + end + for (x, st) in ir + recursable(st.expr) || continue + ir[x] = xcall(profile_fun, st.expr.args...) + end + return ir +end +``` + +where the first pass is as it was above and the `ir[x] = xcall(profile_fun, st.expr.args...)` ensures that the profiler will recursively call itself. `recursable` is a filter defined as below, which is used to prevent profiling itself (and possibly other things). + +```julia +recursable(gr::GlobalRef) = gr.name ∉ [:profile_fun, :record_start, :record_end] +recursable(ex::Expr) = ex.head == :call && recursable(ex.args[1]) +recursable(ex) = false +``` + +Additionally, the first two definitions of `profile_fun` for `Core.IntrinsicFunction` and for `Core.Builtin` prevent trying to dive into functions which do not have a Julia IR. And that's all. The full code is + +```@example lec09 +using IRTools +using IRTools: var, xcall, insert!, insertafter!, func, recurse!, @dynamo + +include("loggingprofiler.jl") +LoggingProfiler.resize!(LoggingProfiler.to, 10000) + +function timable(ex::Expr) + ex.head != :call && return(false) + length(ex.args) < 2 && return(false) + ex.args[1] isa Core.GlobalRef && return(true) + ex.args[1] isa Symbol && return(true) + return(false) +end +timable(ex) = false + +function recursable_fun(ex::GlobalRef) + ex.name ∈ (:profile_fun, :record_start, :record_end) && return(false) + iswhite(recursable_list, ex) && return(true) + isblack(recursable_list, ex) && return(false) + return(isempty(recursable_list) ? true : false) +end + +recursable_fun(ex::IRTools.Inner.Variable) = true + +function recursable(ex::Expr) + ex.head != :call && return(false) + isempty(ex.args) && return(false) + recursable(ex.args[1]) +end + +recursable(ex) = false + +exportname(ex::GlobalRef) = QuoteNode(ex.name) +exportname(ex::Symbol) = QuoteNode(ex) +exportname(ex::Expr) = exportname(ex.args[1]) +exportname(i::Int) = QuoteNode(Symbol("Int(",i,")")) + +profile_fun(f::Core.IntrinsicFunction, args...) = f(args...) +profile_fun(f::Core.Builtin, args...) = f(args...) + +@dynamo function profile_fun(f, args...) + ir = IRTools.Inner.IR(f, args...) + for (v, ex) in ir + if timable(ex.expr) + fname = exportname(ex.expr) + insert!(ir, v, xcall(LoggingProfiler, :record_start, fname)) + insertafter!(ir, v, xcall(LoggingProfiler, :record_end, fname)) + end + end + for (x, st) in ir + recursable(st.expr) || continue + ir[x] = xcall(profile_fun, st.expr.args...) + end + # recurse!(ir) + return ir +end + +macro record(ex) + esc(Expr(:call, :profile_fun, ex.args...)) +end + +LoggingProfiler.reset!() +@record foo(1.0, 1.0) +LoggingProfiler.to +``` + +where you should notice the long time the first execution of `@record foo(1.0, 1.0)` takes. This is caused by the compiler specializing for every function into which we dive into. The second execution of `@record foo(1.0, 1.0)` is fast. It is also interesting to observe how the time of the compilation is logged by the profiler. The output of the profiler `to` is not shown here due to the length of the output. + +## Petite Zygote + +`IRTools.jl` were created for `Zygote.jl` --- Julia's source-to-source AD system currently powering `Flux.jl`. An interesting aspect of `Zygote` was to recognize that TensorFlow is in its nutshell a compiler, PyTorch is an interpreter. So the idea was to let Julia's compiler compile the gradient and perform optimizations that are normally performed with normal code. Recall that a lot of research went into how to generate efficient code and it is reasonable to use this research. `Zygote.jl` provides mainly reversediff, but there was an experimental support for forwarddiff. + +One of the questions when developing an AD engine is where and how to create a computation graph. Recall that in TensorFlow, you specify it through a domain specific language, in PyTorch it generated on the fly. Mike Innes' idea was use SSA form provided by the julia compiler. + +```julia +julia> @code_lowered foo(1.0, 1.0) +CodeInfo( +1 ─ z = x * y +│ %2 = z +│ %3 = Main.sin(y) +│ %4 = %2 + %3 +└── return %4 +) +``` + +It is very easy to differentiate each line, as they correspond to single expressions (or function calls) and importantly, each variable is assigned exactly once. The strategy to use it for AD would as follows. + +### Strategy + +We assume to have a set of AD rules (e.g. ChainRules), which for a given function returns its evaluation and pullback. If `Zygote.jl` is tasked with computing the gradient. + +1. If a rule exists for this function, directly return the rule. +2. If not, deconstruct the function into a sequence of functions using `CodeInfo` / IR representation +3. Replace statements by calls to obtain the evaluation of the statements and the pullback. +4. Chain pullbacks in reverse order. +5. Return the function evaluation and the chained pullback. + +### Simplified implementation + +The following code is adapted from [this example](https://github.com/FluxML/IRTools.jl/blob/master/examples/reverse.jl) + +```julia +using IRTools, ChainRules +using IRTools: @dynamo, IR, Pipe, finish, substitute, return!, block, blocks, + returnvalue, arguments, isexpr, xcall, self, stmt + +struct Pullback{S,T} + data::T +end + +Pullback{S}(data) where S = Pullback{S,typeof(data)}(data) + +function primal(ir, T = Any) + pr = Pipe(ir) + calls = [] + ret = [] + for (v, st) in pr + ex = st.expr + if isexpr(ex, :call) + t = insert!(pr, v, stmt(xcall(Main, :forward, ex.args...), line = st.line)) + pr[v] = xcall(:getindex, t, 1) + J = push!(pr, xcall(:getindex, t, 2)) + push!(calls, v) + push!(ret, J) + end + end + pb = Expr(:call, Pullback{T}, xcall(:tuple, ret...)) + return!(pr, xcall(:tuple, returnvalue(block(ir, 1)), pb)) + return finish(pr), calls +end + +@dynamo function forward(m...) + ir = IR(m...) + ir == nothing && return :(error("Non-differentiable function ", repr(args[1]))) + length(blocks(ir)) == 1 || error("control flow is not supported") + return primal(ir, Tuple{m...})[1] +end +``` + +where + +- the generated function `forward` calls `primal` to perform AD manual chainrule +- actual chainrule is performed in the for loop +- every function call is replaced `xcall(Main, :forward, ex.args...)`, which is the recursion we have observed above. `stmt` allows to insert information about lines in the source code). +- the output of the forward is the value of the function, and *pullback*, the function calculating gradient with respect to its inputs. +- `pr[v] = xcall(:getindex, t, 1)` fixes the output of the overwritten function call to be the output of `forward(...)` +- the next line logs the *pullback* +- `Expr(:call, Pullback{T}, xcall(:tuple, ret...))` will serve to call generated function which will assemble the pullback in the right order + +Let's now observe how the the IR of `foo` is transformed + +```julia +ir = IR(typeof(foo), Float64, Float64) +julia> primal(ir)[1] +1: (%1, %2, %3) + %4 = Main.forward(Main.:*, %2, %3) + %5 = Base.getindex(%4, 1) + %6 = Base.getindex(%4, 2) + %7 = Main.forward(Main.sin, %3) + %8 = Base.getindex(%7, 1) + %9 = Base.getindex(%7, 2) + %10 = Main.forward(Main.:+, %5, %8) + %11 = Base.getindex(%10, 1) + %12 = Base.getindex(%10, 2) + %13 = Base.tuple(%6, %9, %12) + %14 = (Pullback{Any, T} where T)(%13) + %15 = Base.tuple(%11, %14) + return %15 +``` + +- Every function call was transformed into the sequence of `forward(...)` and obtaining first and second item from the returned typle. +- Line `%14` constructs the `Pullback`, which (as will be seen shortly below) will allow to generate the pullback for the generated function +- Line `%15` generates the returned tuple, where the first item is the function value (computed at line `%11`) and pullback (constructed at libe `%15`). + +We define few AD rules by specializing `forward` with calls from `ChainRules` + +```julia +forward(::typeof(sin), x) = ChainRules.rrule(sin, x) +forward(::typeof(*), x, y) = ChainRules.rrule(*, x, y) +forward(::typeof(+), x, y) = ChainRules.rrule(+, x, y) +``` + +Zygote implements this inside the generated function, such that whatever is added to `ChainRules` is automatically reflected. The process is not as trivial (see [`has_chain_rule`](https://github.com/FluxML/Zygote.jl/blob/master/src/compiler/chainrules.jl)) and for the brevity is not shown here. + +We now obtain the value and the pullback of function `foo` as + +```julia +julia> v, pb = forward(foo, 1.0, 1.0); +``` + +- The pullback contains in `data` field with individual jacobians that have been collected in `ret` in `primal` function. +```julia +pb.data[1] +pb.data[2] +pb.data[3] +``` + +The function for which the Jacobian has been created is stored in type parameter `S` of the `Pullback` type. The pullback for `foo` is generated in another generated function, as `Pullback` `struct` is a functor. This is an interesting **design pattern**, which allows us to return *closure* from a generated function. + +Let's now investigate the code generating code for pullback. + +```julia +_sum() = 0 +_sum(x) = x +_sum(x...) = xcall(:+, x...) + +function pullback(pr) + ir = empty(pr) + grads = Dict() + grad(x) = _sum(get(grads, x, [])...) + grad(x, x̄) = push!(get!(grads, x, []), x̄) + grad(returnvalue(block(pr, 1)), IRTools.argument!(ir)) + data = push!(ir, xcall(:getfield, self, QuoteNode(:data))) + _, pbs = primal(pr) + pbs = Dict(pbs[i] => push!(ir, xcall(:getindex, data, i)) for i = 1:length(pbs)) + for v in reverse(keys(pr)) + ex = pr[v].expr + isexpr(ex, :call) || continue + Δs = push!(ir, Expr(:call, pbs[v], grad(v))) + for (i, x) in enumerate(ex.args) + grad(x, push!(ir, xcall(:getindex, Δs, i))) + end + end + return!(ir, xcall(:tuple, [grad(x) for x in arguments(pr)]...)) +end + +@dynamo function (pb::Pullback{S})(Δ) where S + return pullback(IR(S.parameters...)) +end +``` + +Let's walk how the reverse is constructed for `pr = IR(typeof(foo), Float64, Float64)` + +```julia +ir = empty(pr) +grads = Dict() +grad(x) = _sum(get(grads, x, [])...) +grad(x, x̄) = push!(get!(grads, x, []), x̄) +``` + +construct the empty `ir` for the constructed pullback, defines `Dict` where individual contributors of the gradient with respect to certain variable will be stored, and two function for pushing statements to to `grads`. The next statement + +```julia +grad(returnvalue(block(pr, 1)), IRTools.argument!(ir)) +``` + +pushes to `grads` statement that the gradient of the output of the primal `pr` is provided as an argument of the pullback `IRTools.argument!(ir)`. + +```julia +data = push!(ir, xcall(:getfield, self, QuoteNode(:data))) +_, pbs = primal(pr) +pbs = Dict(pbs[i] => push!(ir, xcall(:getindex, data, i)) for i = 1:length(pbs)) +``` + +sets `data` to the `data` field of the `Pullback` structure containing pullback functions. Then it create a dictionary `pbs`, where the output of each call in the primal (identified by the line) is mapped to the corresponding pullback, which is now a line in the IR representation. +The IR so far looks as + +```julia +1: (%1) + %2 = Base.getfield(IRTools.Inner.Self(), :data) + %3 = Base.getindex(%2, 1) + %4 = Base.getindex(%2, 2) + %5 = Base.getindex(%2, 3) +``` + +and `pbs` contains + +```julia +julia> pbs +Dict{IRTools.Inner.Variable, IRTools.Inner.Variable} with 3 entries: + %6 => %5 + %4 => %3 + %5 => %4 +``` + +says that the pullback of a function producing variable at line `%6` in the primal is stored at variable `%5` in the contructed pullback. +The real deal comes in the for loop + +```julia +for v in reverse(keys(pr)) + ex = pr[v].expr + isexpr(ex, :call) || continue + Δs = push!(ir, Expr(:call, pbs[v], grad(v))) + for (i, x) in enumerate(ex.args) + grad(x, push!(ir, xcall(:getindex, Δs, i))) + end +end +``` + +which iterates the primal `pr` in the reverse order and for every call, it inserts statement to calls the appropriate pullback `Δs = push!(ir, Expr(:call, pbs[v], grad(v)))` and adds gradients with respect to the inputs to values accumulating corresponding gradient in the loop `for (i, x) in enumerate(ex.args) ...` +The last line + +```julia +return!(ir, xcall(:tuple, [grad(x) for x in arguments(pr)]...)) +``` + +puts statements accumulating gradients with respect to individual variables to the ir. + +The final generated IR code looks as + +```julia +julia> pullback(IR(typeof(foo), Float64, Float64)) +1: (%1) + %2 = Base.getfield(IRTools.Inner.Self(), :data) + %3 = Base.getindex(%2, 1) + %4 = Base.getindex(%2, 2) + %5 = Base.getindex(%2, 3) + %6 = (%5)(%1) + %7 = Base.getindex(%6, 1) + %8 = Base.getindex(%6, 2) + %9 = Base.getindex(%6, 3) + %10 = (%4)(%9) + %11 = Base.getindex(%10, 1) + %12 = Base.getindex(%10, 2) + %13 = (%3)(%8) + %14 = Base.getindex(%13, 1) + %15 = Base.getindex(%13, 2) + %16 = Base.getindex(%13, 3) + %17 = %12 + %16 + %18 = Base.tuple(0, %15, %17) + return %18 +``` + +and it calculates the gradient with respect to the input as + +```julia +julia> pb(1.0) +(0, 1.0, 1.5403023058681398) +``` + +where the first item is gradient with parameters of the function itself. + +## Conclusion + +The above examples served to demonstrate that `@generated` functions offers extremely powerful paradigm, especially if coupled with manipulation of intermediate representation. Within few lines of code, we have implemented reasonably powerful profiler and reverse AD engine. Importantly, it has been done without a single-purpose engine or tooling. + \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_09/loggingprofiler.jl b/docs_vitepress/src/lectures/lecture_09/loggingprofiler.jl new file mode 100644 index 00000000..a557d193 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/loggingprofiler.jl @@ -0,0 +1,69 @@ +module LoggingProfiler +struct Calls + stamps::Vector{Float64} # contains the time stamps + event::Vector{Symbol} # name of the function that is being recorded + startstop::Vector{Symbol} # if the time stamp corresponds to start or to stop + i::Ref{Int} +end + +function Calls(n::Int) + Calls(Vector{Float64}(undef, n+1), Vector{Symbol}(undef, n+1), Vector{Symbol}(undef, n+1), Ref{Int}(0)) +end + +function Base.show(io::IO, calls::Calls) + offset = 0 + if calls.i[] >= length(calls.stamps) + @warn "The recording buffer was too small, consider increasing it" + end + for i in 1:min(calls.i[], length(calls.stamps)) + offset -= calls.startstop[i] == :stop + foreach(_ -> print(io, " "), 1:max(offset, 0)) + rel_time = calls.stamps[i] - calls.stamps[1] + println(io, calls.event[i], ": ", rel_time) + offset += calls.startstop[i] == :start + end +end + +global const to = Calls(100) + +""" + record_start(ev::Symbol) + + record the start of the event, the time stamp is recorded after all counters are + appropriately increased +""" +record_start(ev::Symbol) = record_start(to, ev) +function record_start(calls, ev::Symbol) + n = calls.i[] = calls.i[] + 1 + n > length(calls.stamps) && return + calls.event[n] = ev + calls.startstop[n] = :start + calls.stamps[n] = time_ns() +end + +""" + record_end(ev::Symbol) + + record the end of the event, the time stamp is recorded before all counters are + appropriately increased +""" +record_end(ev::Symbol) = record_end(to, ev::Symbol) +function record_end(calls, ev::Symbol) + t = time_ns() + n = calls.i[] = calls.i[] + 1 + n > length(calls.stamps) && return + calls.event[n] = ev + calls.startstop[n] = :stop + calls.stamps[n] = t +end + +reset!() = to.i[] = 0 + +function Base.resize!(calls::Calls, n::Integer) + resize!(calls.stamps, n) + resize!(calls.event, n) + resize!(calls.startstop, n) +end + +end + diff --git a/docs_vitepress/src/lectures/lecture_09/petite_diffractor.jl b/docs_vitepress/src/lectures/lecture_09/petite_diffractor.jl new file mode 100644 index 00000000..509fbbaa --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/petite_diffractor.jl @@ -0,0 +1,261 @@ +# A simple reverse-mode AD. +# Lots of simplifications have been made (in particular, there is no support for +# control flow). But this illustrates most of the principles behind Zygote. +# https://fluxml.ai/Zygote.jl/dev/internals/ + + +##### + +# We assume to have a set of AD rules (e.g. ChainRules), which for a given function returns its evaluation and pullback. If we are tasked with computing the gradient. + +# 1. If a rule exists for this function, directly return the rule. +# 2. If not, deconstruct the function into a sequence of functions by asking f `IRCode` +# 3. Replace statements by calls to obtain the evaluation of the statements and the pullback. +# 4. Chain pullbacks in reverse order. +# 5. Return the function evaluation and the chained pullback. + +# The idea is that we will replace each statement of `foo` with a statement returning the function value and pullback. At the moment and for simplicity, we assume that appropriate chain is defined. Moreover, we need to keep track of mapping old SSAValues to new SSAValues in ssamap, since their values will differ. + +import Core.Compiler as CC +using ChainRules +using Core: SSAValue, GlobalRef, ReturnNode, Argument + + +function get_ciir(f, sig; world = Core.Compiler.get_world_counter(), optimize_until = "compact 1") + mi = only(Base.method_instances(f, sig, world)) + ci = Base.uncompressed_ir(mi.def::Method) + (ir, rt) = only(Base.code_ircode(f, sig; optimize_until)) + (copy(ci), ir, rt) +end + + + +struct Pullback{S,T} + pullbacks::T +end + +Pullback{S}(pullbacks) where S = Pullback{S,typeof(pullbacks)}(pullbacks) + + +argtype(ir::CC.IRCode, a::Core.Argument) = ir.argtypes[a.n] +argtype(ir::CC.IRCode, a::Core.SSAValue) = ir.stmts.type[a.id] +argtype(ir::CC.IRCode, f::GlobalRef) = typeof(getproperty(f.mod, f.name)) # better than argtype(ir::CC.IRCode, f::GlobalRef) = typeof(eval(f)) +argtype(ir::CC.IRCode, a) = error("argtype of $(typeof(a)) not supported") + +""" + type_of_pullback(ir, inst) + + infer type of the pullback +""" +function type_of_pullback(ir, inst, optimize_until = "compact 1") + inst.head != :call && error("inferrin return type of calls is supported") + params = tuple([argtype(ir, a) for a in inst.args]...) + (ir, rt) = only(Base.code_ircode(ChainRules.rrule, params; optimize_until)) + if !(rt <:Tuple{A,B} where {A,B}) + error("The return type of pullback `ChainRules.rrule($(params))` should be tuple") + end + rt +end + +remap(d, args::Tuple) = map(a -> remap(d,a), args) +remap(d, args::Vector) = map(a -> remap(d,a), args) +remap(d, r::ReturnNode) = ReturnNode(remap(d, r.val)) +remap(d, x::SSAValue) = d[x] +remap(d, x) = x + +function forward(ir, S) + pullbacks = [] + new_insts = Any[] + new_line = Int32[] + new_types = Any[] + ssamap = Dict{SSAValue,SSAValue}() + fval_ssa = nothing + for (i, stmt) in enumerate(ir.stmts) + inst = stmt[:inst] + if inst isa Expr && inst.head == :call + new_inst = Expr(:call, GlobalRef(ChainRules, :rrule), remap(ssamap, inst.args)...) + tt = type_of_pullback(ir, inst) + push!(new_insts, new_inst) + push!(new_line, stmt[:line]) + push!(new_types, tt) + rrule_ssa = SSAValue(length(new_insts)) + + push!(new_insts, Expr(:call, :getindex, rrule_ssa, 1)) + push!(new_line, stmt[:line]) + push!(new_types, tt.parameters[1]) + val_ssa = SSAValue(length(new_insts)) + ssamap[SSAValue(i)] = val_ssa + (stmt[:type] != tt.parameters[1]) && @info("pullback of $(inst) has a different type than normal inst") + + push!(new_insts, Expr(:call, :getindex, rrule_ssa, 2)) + pullback_ssa = SSAValue(length(new_insts)) + push!(new_line, stmt[:line]) + push!(new_types, tt.parameters[2]) + push!(pullbacks, pullback_ssa) + continue + end + + if inst isa ReturnNode + fval_ssa = remap(ssamap, inst.val) + continue + end + error("unknown node $(i)") + end + + # construct tuple with all pullbacks + push!(new_insts, Expr(:call, :tuple, pullbacks...)) + pull_ssa = SSAValue(length(new_insts)) + push!(new_line, new_line[end]) + push!(new_types, Tuple{[new_types[x.id] for x in pullbacks]...}) + + # construct the PullBack + + pb = Expr(:call, Pullback{S}, pull_ssa) + + + # construct the tuple containing forward and reverse + push!(new_insts, Expr(:call, :tuple, fval_ssa, calltype_ssa)) + ret_ssa = SSAValue(length(new_insts)) + push!(new_line, new_line[end]) + push!(new_types, Tuple{new_types[fval_ssa.id], new_types[calltype_ssa.id]}) + + # # construct the tuple containing forward and reverse + # push!(new_insts, Expr(:call, :tuple, fval_ssa, pull_ssa)) + # ret_ssa = SSAValue(length(new_insts)) + # push!(new_line, new_line[end]) + # push!(new_types, Tuple{new_types[fval_ssa.id], new_types[pull_ssa.id]}) + + # put a return statement + push!(new_insts, ReturnNode(ret_ssa)) + push!(new_line, new_line[end]) + push!(new_types, Any) + + # this nightmare construct the IRCode with absolutely useless type information + is = CC.InstructionStream( + new_insts, # inst::Vector{Any} + new_types, # type::Vector{Any} + fill(CC.NoCallInfo(), length(new_insts)), # info::Vector{CallInfo} + new_line, # line::Vector{Int32} + fill(UInt8(0), length(new_insts)), # flag::Vector{UInt8} + ) + cfg = CC.compute_basic_blocks(new_insts) + new_ir = CC.IRCode(is, cfg, ir.linetable, ir.argtypes, ir.meta, ir.sptypes) +end + + + +function play(ir) + pullbacks = [] + new_insts = Any[] + new_line = Int32[] + new_types = Any[] + + push!(new_insts, Expr(:call, GlobalRef(Core, :typeof), Argument(1))) + # push!(new_insts, Argument(1)) + ret_ssa = SSAValue(length(new_insts)) + push!(new_line, ir.stmts[1][:line]) + push!(new_types, Any) + + + # put a return statement + push!(new_insts, ReturnNode(ret_ssa)) + push!(new_line, new_line[end]) + push!(new_types, Any) + + # this nightmare construct the IRCode with absolutely useless type information + is = CC.InstructionStream( + new_insts, # inst::Vector{Any} + new_types, # type::Vector{Any} + fill(CC.NoCallInfo(), length(new_insts)), # info::Vector{CallInfo} + new_line, # line::Vector{Int32} + fill(UInt8(0), length(new_insts)), # flag::Vector{UInt8} + ) + cfg = CC.compute_basic_blocks(new_insts) + new_ir = CC.IRCode(is, cfg, ir.linetable, ir.argtypes, ir.meta, ir.sptypes) +end + + +function demo() + + function foo(x,y) + z = x * y + z + sin(x) + end + + + (ci, ir, rt) = get_ciir(foo, (Float64, Float64)) + new_ir = forward(ir) + CC.replace_code_newstyle!(ci, ir) + + forw = Core.OpaqueClosure(forward(ir)) + fval, pullbacks = forw(1.0,1.0) +end + +# S = Tuple{foo, Float64, Float64} +# T = typeof(pullbacks) +# @macroexpand @code_typed Pullback{S,T}(pullbacks) +# (ci, ir, rt) = get_ciir(Pullback{S,T}, T) + +# julia> dump(e) +# Expr +# head: Symbol new +# args: Array{Any}((2,)) +# 1: Core.Argument +# n: Int64 1 +# 2: Core.Argument +# n: Int64 2 + +# julia> +# (1.0,1.0) + + +""" + function reverse(ir) + + we construct the reverse using the original `ir` code, since we can obtain it in from the + parameter `S` of the Pullback{S,T}. `S` can contain `(foo, Float64,Float64)` when + we compute the gradient of `foo`. +""" +function reverse(ir) + diffmap = Dict{Any,Any}() # this will hold the mapping where is the gradient with respect to SSA. + # the argument of the pullback we are defining is a gradient with respect to the argument of return + # which we assume to be the last of insturction in `inst` + @assert ir.stmts.inst[end] isa ReturnNode + diffmap[ir.stmts.inst[end].val] = Core.Argument(2) + + reverse_inst = [] + + # a first IR will be to get a structure will pullbacks from the first argument + push!(reverse_inst, Expr(:call, GlobalRef(Core, :getfield), Core.Argument(1), :pullbacks)) + pullbacks_ssa = SSAValue(length(reverse_inst)) + + # we should filter statements from which we can get the pullbacks, but for the trivial + # function without control flow this is not needed + + # now we iterate over pullbacks and execute one by one with correct argument + for i in (length(ir.stmts)-1):-1:1 + inst = ir.stmts[i][:inst] + val_ssa = SSAValue(i) + + #first, we get the pullback + push!(reverse_inst, Expr(:call, GlobalRef(Base, :getindex), pullbacks_ssa, i)) + pullback_ssa = SSAValue(length(reverse_inst)) + + #execute pullback + push!(reverse_inst, Expr(:call, pullback_ssa, diffmap[val_ssa])) + arg_grad = SSAValue(length(reverse_inst)) + for (j, a) in enumerate(inst.args) + j == 1 && continue # we omit gradient with respect to the name of the function and rrule + if haskey(diffmap, a) # we need to perform addition + push!(reverse_inst, Expr(:call, GlobalRef(Base, :getindex), arg_grad, j)) + sv = SSAValue(length(reverse_inst)) + push!(reverse_inst, Expr(:call, GlobalRef(Base, :+), sv, diffmap[a])) + diffmap[a] = SSAValue(length(reverse_inst)) + else + push!(reverse_inst, Expr(:call, GlobalRef(Base, :getindex), arg_grad, j)) + diffmap[a] = SSAValue(length(reverse_inst)) + end + end + end + +end diff --git a/docs_vitepress/src/lectures/lecture_09/petite_zygote.jl b/docs_vitepress/src/lectures/lecture_09/petite_zygote.jl new file mode 100644 index 00000000..6ec1487a --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/petite_zygote.jl @@ -0,0 +1,82 @@ +# A simple reverse-mode AD. +# Lots of simplifications have been made (in particular, there is no support for +# control flow). But this illustrates most of the principles behind Zygote. +# https://fluxml.ai/Zygote.jl/dev/internals/ + +using IRTools +using IRTools: @dynamo, IR, Pipe, finish, substitute, return!, block, blocks, + returnvalue, arguments, isexpr, xcall, self, stmt + +struct Pullback{S,T} + data::T +end + +Pullback{S}(data) where S = Pullback{S,typeof(data)}(data) + +function primal(ir, T = Any) + pr = Pipe(ir) + calls = [] + ret = [] + for (v, st) in pr + ex = st.expr + if isexpr(ex, :call) + t = insert!(pr, v, stmt(xcall(Main, :forward, ex.args...), line = st.line)) + pr[v] = xcall(:getindex, t, 1) + J = push!(pr, xcall(:getindex, t, 2)) + push!(calls, v) + push!(ret, J) + end + end + pb = Expr(:call, Pullback{T}, xcall(:tuple, ret...)) + return!(pr, xcall(:tuple, returnvalue(block(ir, 1)), pb)) + return finish(pr), calls +end + +_sum() = 0 +_sum(x) = x +_sum(x...) = xcall(:+, x...) + +function adjoint(pr) + ir = empty(pr) + grads = Dict() + grad(x) = _sum(get(grads, x, [])...) + grad(x, x̄) = push!(get!(grads, x, []), x̄) + grad(returnvalue(block(pr, 1)), IRTools.argument!(ir)) + data = push!(ir, xcall(:getfield, self, QuoteNode(:data))) + _, pbs = primal(pr) + pbs = Dict(pbs[i] => push!(ir, xcall(:getindex, data, i)) for i = 1:length(pbs)) + + for v in reverse(keys(pr)) + ex = pr[v].expr + isexpr(ex, :call) || continue + Δs = push!(ir, Expr(:call, pbs[v], grad(v))) + for (i, x) in enumerate(ex.args) + grad(x, push!(ir, xcall(:getindex, Δs, i))) + end + end + return!(ir, xcall(:tuple, [grad(x) for x in arguments(pr)]...)) +end + +@dynamo function forward(m...) + ir = IR(m...) + ir == nothing && return :(error("Non-differentiable function ", repr(args[1]))) + length(blocks(ir)) == 1 || error("control flow is not supported") + return primal(ir, Tuple{m...})[1] +end + +@dynamo function (pb::Pullback{S})(Δ) where S + return adjoint(IR(S.parameters...)) +end + +forward(::typeof(sin), x) = sin(x), ȳ -> (nothing, ȳ*cos(x)) +forward(::typeof(cos), x) = cos(x), ȳ -> (nothing, -ȳ*sin(x)) + +gradient(f, x...) = Base.tail(forward(f, x...)[2](1)) + +foo(x) = sin(cos(x)) + +# ir = @code_ir foo(1.0) +# primal(ir)[1] +# adjoint(ir) + +gradient(foo, 1.0) diff --git a/docs_vitepress/src/lectures/lecture_09/timer.jl b/docs_vitepress/src/lectures/lecture_09/timer.jl new file mode 100644 index 00000000..314d8b89 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_09/timer.jl @@ -0,0 +1,118 @@ +# Generated functions +using Dictionaries +function retrieve_code_info(sigtypes, world = Base.get_world_counter()) + S = Tuple{map(s -> Core.Compiler.has_free_typevars(s) ? typeof(s.parameters[1]) : s, sigtypes)...} + _methods = Base._methods_by_ftype(S, -1, world) + isempty(_methods) && @error("method $(sigtypes) does not exist, may-be run it once") + type_signature, raw_static_params, method = _methods[1] # method is the same as we would get by invoking methods(+, (Int, Int)).ms[1] + + # this provides us with the CodeInfo + method_instance = Core.Compiler.specialize_method(method, type_signature, raw_static_params, false) + code_info = Core.Compiler.retrieve_code_info(method_instance) +end + +struct Calls + stamps::Vector{Float64} # contains the time stamps + event::Vector{Symbol} # name of the function that is being recorded + startstop::Vector{Symbol} # if the time stamp corresponds to start or to stop + i::Ref{Int} +end + +function Calls(n::Int) + Calls(Vector{Float64}(undef, n+1), Vector{Symbol}(undef, n+1), Vector{Symbol}(undef, n+1), Ref{Int}(0)) +end + +global const to = Calls(100) + +function Base.show(io::IO, calls::Calls) + for i in 1:calls.i[] + println(io, calls.stamps[i] - calls.stamps[1]," ", calls.startstop[i]," ",calls.event[i]) + end +end + +function Base.push!(calls::Calls, s::Symbol, ev::Symbol) + n = calls.i[] = calls.i[] + 1 + n > length(calls.stamps) && return + calls.event[n] = ev + calls.startstop[n] = s + calls.stamps[n] = time() +end + +reset!(calls::Calls) = calls.i[] = 0 + +struct Context{T<:Union{Nothing, Vector{Symbol}}} + functions::T +end +Context() = Context(nothing) + +overdubbable(ex::Expr) = false +overdubbable(ex) = false +timable(ex::Expr) = ex.head == :call +timable(ex) = false + +rename_args(ex, slot_vars, ssa_vars) = ex +rename_args(ex::Expr, slot_vars, ssa_vars) = Expr(ex.head, rename_args(ex.args, slot_vars, ssa_vars)...) +rename_args(args::AbstractArray, slot_vars, ssa_vars) = map(a -> rename_args(a, slot_vars, ssa_vars), args) +rename_args(r::Core.ReturnNode, slot_vars, ssa_vars) = Core.ReturnNode(rename_args(r.val, slot_vars, ssa_vars)) +rename_args(a::Core.SlotNumber, slot_vars, ssa_vars) = slot_vars[a.id] +rename_args(a::Core.SSAValue, slot_vars, ssa_vars) = ssa_vars[a.id] + +assigned_vars(ex) = [] +assigned_vars(ex::Expr) = assigned_vars(ex.args) +assigned_vars(args::AbstractArray) = mapreduce(assigned_vars, vcat, args) +assigned_vars(r::Core.ReturnNode) = assigned_vars(r.val) +assigned_vars(a::Core.SlotNumber) = [] +assigned_vars(a::Core.SSAValue) = [a.id] + +exportname(ex::GlobalRef) = ex.name +exportname(ex::Expr) = ex.args[1] + +overdub(ctx::Context, f::Core.IntrinsicFunction, args...) = f(args...) + +@generated function overdub(ctx::Context, f::F, args...) where {F} + ci = retrieve_code_info((F, args...)) + slot_vars = Dict(enumerate(ci.slotnames)) + # ssa_vars = Dict(i => gensym(:left) for i in 1:length(ci.code)) + ssa_vars = Dict(i => Symbol(:L, i) for i in 1:length(ci.code)) + used = assigned_vars(ci.code) |> distinct + exprs = [] + for i in 1:length(args) + push!(exprs, Expr(:(=), ci.slotnames[i+1], :(args[$(i)]))) + end + for (i, ex) in enumerate(ci.code) + ex = rename_args(ex, slot_vars, ssa_vars) + if ex isa Core.ReturnNode + push!(exprs, Expr(:return, ex.val)) + continue + end + if timable(ex) + fname = exportname(ex) + fname = :(Symbol($(fname))) + push!(exprs, Expr(:call, :push!, :to, :(:start), fname)) + ex = overdubbable(ex) ? Expr(:call, :overdub, :ctx, ex.args...) : ex + ex = i ∈ used ? Expr(:(=) , ssa_vars[i], ex) : ex + push!(exprs, ex) + push!(exprs, Expr(:call, :push!, :to, :(:stop), fname)) + else + ex = i ∈ used ? Expr(:(=) , ssa_vars[i], ex) : ex + push!(exprs, ex) + end + end + r = Expr(:block, exprs...) + @show r + # println(" ") + r +end + + +function foo(x, y) + z = x * y + z + sin(y) +end +reset!(to) +overdub(Context(), foo, 1.0, 1.0) +to + +reset!(to) +overdub(ctx, Base.Math.sin_kernel, 1.0) + diff --git a/docs_vitepress/src/lectures/lecture_10/42-years-processor-trend.png b/docs_vitepress/src/lectures/lecture_10/42-years-processor-trend.png new file mode 100644 index 00000000..4915f23f Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_10/42-years-processor-trend.png differ diff --git a/docs_vitepress/src/lectures/lecture_10/exercise.jl b/docs_vitepress/src/lectures/lecture_10/exercise.jl new file mode 100644 index 00000000..957ef30f --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/exercise.jl @@ -0,0 +1,75 @@ +@everywhere begin + """ + sample_all_installed_pkgs(path::AbstractString) + + Returns root folders of all installed packages in the system. Package version is sampled. + """ + function sample_all_installed_pkgs(path::AbstractString) + pkgs = readdir(path) + # [rand(readdir(joinpath(path, p), join=true)) for p in pkgs] # sampling version + [readdir(joinpath(path, p), join=true)[1] for p in pkgs if isdir(joinpath(path, p))] # deterministic version + end + + """ + filter_jl(path) + + Recursively walks the directory structure to obtain all `.jl` files. + """ + filter_jl(path) = reduce(vcat, joinpath.(rootpath, filter(endswith(".jl"), files)) for (rootpath, dirs, files) in walkdir(path)) + + """ + tokenize(jl_path) + + Parses a ".jl" file located at `jl_path` and extracts all symbols and expression heads from the extracted AST. + """ + function tokenize(jl_path) + _extract_symbols(x) = Symbol[] + _extract_symbols(x::Symbol) = [x] + function _extract_symbols(x::Expr) + if length(x.args) > 0 + Symbol.(vcat(x.head, reduce(vcat, _extract_symbols(arg) for arg in x.args))) + else + Symbol[] + end + end + + scode = "begin\n" * read(jl_path, String) * "end\n" + try + code = Meta.parse(scode) + _extract_symbols(code) + catch e + if ~isa(e, Meta.ParseError) + rethrow(e) + end + Symbol[] + end + end + + + function histtokens!(h, filename::AbstractString) + for t in tokenize(filename) + h[t] = get(h, t, 0) + 1 + end + h + end + + function dohistogram(chnl) + h = Dict{Symbol, Int}() + while isready(chnl) + f = take!(chnl) + histtokens!(h, f) + end + return(h) + end +end + +chnl = RemoteChannel() do + Channel(typemax(Int)) do ch + for package in sample_all_installed_pkgs("/Users/tomas.pevny/.julia/packages") + foreach(c -> put!(ch, c), filter_jl(package)) + end + end +end + +mapreduce(fetch, mergewith(+), [@spawnat i dohistogram(chnl) for i in workers()]) + diff --git a/docs_vitepress/src/lectures/lecture_10/hw.md b/docs_vitepress/src/lectures/lecture_10/hw.md new file mode 100644 index 00000000..114ca00e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/hw.md @@ -0,0 +1,43 @@ +# [Homework 9: Accelerating 1D convolution with threads](@id hw09) + +## How to submit + +Put all the code of inside `hw.jl`. Zip only this file (not its parent folder) and upload it to BRUTE. You should not not import anything but `Base.Threads` or just `Threads`. + +::: danger Homework (2 points) + +Implement *multithreaded* discrete 1D convolution operator[^1] without padding (output will be shorter). The required function signature: `thread_conv1d(x, w)`, where `x` is the signal array and `w` the kernel. For testing correctness of the implementation you can use the following example of a step function and it's derivative realized by kernel `[-1, 1]`: + +```julia +using Test +@test all(thread_conv1d(vcat([0.0, 0.0, 1.0, 1.0, 0.0, 0.0]), [-1.0, 1.0]) .≈ [0.0, -1.0, 0.0, 1.0, 0.0]) +``` + +[^1]: Discrete convolution with finite support [https://en.wikipedia.org/wiki/Convolution#Discrete\_convolution](https://en.wikipedia.org/wiki/Convolution#Discrete_convolution) + +Your parallel implementation will be tested both in sequential and two threaded mode with the following inputs + +```julia +using Random +Random.seed!(42) +x = rand(10_000_000) +w = [1.0, 2.0, 4.0, 2.0, 1.0] +@btime thread_conv1d($x, $w); +``` + +On your local machine you should be able to achieve `0.6x` reduction in execution time with two threads, however the automatic eval system is a noisy environment and therefore we require only `0.8x` reduction therein. This being said, please reach out to us, if you encounter any issues. + +**HINTS**: + +- start with single threaded implementation +- don't forget to reverse the kernel +- `@threads` macro should be all you need +- for testing purposes create a simple script, that you can run with `julia -t 1` and `julia -t 2` + +::: + +::: details Show solution + +Nothing to see here + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_10/juliaset_p.jl b/docs_vitepress/src/lectures/lecture_10/juliaset_p.jl new file mode 100644 index 00000000..b19a7b3e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/juliaset_p.jl @@ -0,0 +1,75 @@ +using Pkg +Pkg.activate(@__DIR__) +using GLMakie +using BenchmarkTools +using Distributed +using SharedArrays + +function juliaset_pixel(z₀, c) + z = z₀ + for i in 1:255 + abs2(z)> 4.0 && return (i - 1)%UInt8 + z = z*z + c + end + return UInt8(255) +end + +function juliaset_column!(img, c, n, colj, j) + x = -2.0 + (j-1)*4.0/(n-1) + for i in 1:n + y = -2.0 + (i-1)*4.0/(n-1) + @inbounds img[i, colj] = juliaset_pixel(x+im*y, c) + end + nothing +end + +function juliaset_columns(c, n, columns) + img = Array{UInt8,2}(undef, n, length(columns)) + for (colj, j) in enumerate(columns) + juliaset_column!(img, c, n, colj, j) + end + img +end + +function juliaset_distributed(x, y, partitions = nworkers(), n = 1000) + c = x + y*im + columns = Iterators.partition(1:n, div(n, partitions)) + slices = pmap(cols -> juliaset_columns(c, n, cols), columns) + reduce(hcat, slices) +end + +# @btime juliaset_distributed(-0.79, 0.15) + +# frac = juliaset_distributed(-0.79, 0.15) +# plot(heatmap(1:size(frac,1), 1:size(frac,2), frac, color=:Spectral)) + + +#### +# Let's work out the shared array approach +#### +function juliaset_column!(img, c, n, j) + x = -2.0 + (j-1)*4.0/(n-1) + for i in 1:n + y = -2.0 + (i-1)*4.0/(n-1) + @inbounds img[i, j] = juliaset_pixel(x+im*y, c) + end + nothing +end + +function juliaset_range!(img, c, n, columns) + for j in columns + juliaset_column!(img, c, n, j) + end + nothing +end + +function juliaset_shared(x, y, partitions = nworkers(), n = 1000) + c = x + y*im + columns = Iterators.partition(1:n, div(n, partitions)) + img = SharedArray{UInt8,2}((n, n)) + slices = pmap(cols -> juliaset_range!(img, c, n, cols), columns) + img +end + +# juliaset_shared(-0.79, 0.15) +# juliaset_shared(-0.79, 0.15, 16) diff --git a/docs_vitepress/src/lectures/lecture_10/juliaset_t.jl b/docs_vitepress/src/lectures/lecture_10/juliaset_t.jl new file mode 100644 index 00000000..ebaadd99 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/juliaset_t.jl @@ -0,0 +1,77 @@ +using Plots, BenchmarkTools +Threads.nthreads() +function juliaset_pixel(z₀, c) + z = z₀ + for i in 1:255 + abs2(z)> 4.0 && return (i - 1)%UInt8 + z = z*z + c + end + return UInt8(255) +end + +function juliaset_column!(img, c, n, j) + x = -2.0 + (j-1)*4.0/(n-1) + for i in 1:n + y = -2.0 + (i-1)*4.0/(n-1) + @inbounds img[i,j] = juliaset_pixel(x+im*y, c) + end + nothing +end + +function juliaset_single!(img, c, n) + for j in 1:n + juliaset_column!(img, c, n, j) + end + nothing +end + +function juliaset(x, y, n=1000, method = juliaset_single!, extra...) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + method(img, c, n, extra...) + return img +end + +# frac = juliaset(-0.79, 0.15) +# plot(heatmap(1:size(frac,1),1:size(frac,2), frac, color=:Spectral)) + + +@btime juliaset(-0.79, 0.15); + +function juliaset_static!(img, c, n) + Threads.@threads for j in 1:n + juliaset_column!(img, c, n, j) + end + nothing +end + +@btime juliaset(-0.79, 0.15, 1000, juliaset_static!); + + +using Folds +function juliaset_folds!(img, c, n) + Folds.foreach(j -> juliaset_column!(img, c, n, j), 1:n) + nothing +end +julia> @btime juliaset(-0.79, 0.15, 1000, juliaset_folds!); + 16.267 ms (25 allocations: 978.20 KiB) + +function juliaset_folds!(img, c, n, nt) + parts = collect(Iterators.partition(1:n, cld(n, nt))) + Folds.foreach(parts) do ii + foreach(j ->juliaset_column!(img, c, n, j), ii) + end + nothing +end +julia> @btime juliaset(-0.79, 0.15, 1000, (args...) -> juliaset_folds!(args..., 16)); + 16.716 ms (25 allocations: 978.61 KiB) + + +using FLoops, FoldsThreads +function juliaset_folds!(img, c, n) + @floop ThreadedEx(basesize = 2) for j in 1:n + juliaset_column!(img, c, n, j) + end + nothing +end +@btime juliaset(-0.79, 0.15, 1000, juliaset_folds!); \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_10/lab.md b/docs_vitepress/src/lectures/lecture_10/lab.md new file mode 100644 index 00000000..7ea523c0 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/lab.md @@ -0,0 +1,798 @@ +# [Lab 10: Parallel computing](@id parallel_lab) + +In this lab we are going to introduce tools that Julia's ecosystem offers for different ways of parallel computing. As an ilustration for how capable Julia was/is consider the fact that it has joined (alongside C,C++ and Fortran) the so-called "PetaFlop club"[^1], a list of languages capable of running at over 1PFLOPS. + +[^1]: Blogpost "Julia Joins Petaflop Club" [https://juliacomputing.com/media/2017/09/julia-joins-petaflop-club/](https://juliacomputing.com/media/2017/09/julia-joins-petaflop-club/) + +## Introduction + +Nowadays there is no need to convince anyone about the advantages of having more cores available for your computation be it on a laptop, workstation or a cluster. The trend can be nicely illustrated in the figure bellow: + +![42-cpu-trend](./42-years-processor-trend.png) +Image source[^2] + +[^2]: Performance metrics trend of CPUs in the last 42years: [https://www.karlrupp.net/2018/02/42-years-of-microprocessor-trend-data/](https://www.karlrupp.net/2018/02/42-years-of-microprocessor-trend-data/) + +However there are some shortcomings when going from sequential programming, that we have to note + +- We don't think in parallel +- We learn to write and reason about programs serially +- The desire for parallelism often comes *after* you've written your algorithm (and found it too slow!) +- Harder to reason and therefore harder to debug +- The number of cores is increasing, thus knowing how the program scales is crucial (not just that it runs better) +- Benchmarking parallel code, that tries to exhaust the processor pool is much more affected by background processes + +::: tip Shortcomings of parallelism + +Parallel computing brings its own set of problems and not an insignificant overhead with data manipulation and communication, therefore try always to optimize your serial code as much as you can before advancing to parallel acceleration. + +::: + +::: tip Disclaimer + +With the increasing complexity of computer HW some statements may become outdated. Moreover we won't cover as many tips that you may encounter on a parallel programming specific course, which will teach you more in the direction of how to think in parallel, whereas here we will focus on the tools that you can use to realize the knowledge gained therein. + +::: + +## Process based parallelism + +As the name suggest process based parallelism is builds on the concept of running code on multiple processes, which can run even on multiple machines thus allowing to scale computing from a local machine to a whole network of machines - a major difference from the other parallel concept of threads. In Julia this concept is supported within standard library `Distributed` and the scaling to cluster can be realized by 3rd party library [`ClusterManagers.jl`](https://github.com/JuliaParallel/ClusterManagers.jl). + +Let's start simply with knowing how to start up additional Julia processes. There are two ways: + +- by adding processes using cmd line argument `-p ##` + +```bash +julia -p 4 +``` + +- by adding processes after startup using the `addprocs(##)` function from std library `Distributed` + +```julia +julia> using Distributed + +julia> addprocs(4) # returns a list of ids of individual processes +4-element Vector{Int64}: + 2 + 3 + 4 + 5 + +julia> nworkers() # returns number of workers +4 + +julia> nprocs() # returns number of processes `nworkers() + 1` +5 +``` + +The result shown in a process manager such as `htop`: + +```bash +.../julia-1.6.2/bin/julia --project +.../julia-1.6.2/bin/julia -Cnative -J/home/honza/Apps/julia-1.6.2/lib/julia/sys.so -g1 --bind-to 127.0.0.1 --worker +.../julia-1.6.2/bin/julia -Cnative -J/home/honza/Apps/julia-1.6.2/lib/julia/sys.so -g1 --bind-to 127.0.0.1 --worker +.../julia-1.6.2/bin/julia -Cnative -J/home/honza/Apps/julia-1.6.2/lib/julia/sys.so -g1 --bind-to 127.0.0.1 --worker +.../julia-1.6.2/bin/julia -Cnative -J/home/honza/Apps/julia-1.6.2/lib/julia/sys.so -g1 --bind-to 127.0.0.1 --worker +``` + +Both of these result in total of 5 running processes - 1 controller, 4 workers - with their respective ids accessible via `myid()` function call. Note that the controller process has always id 1 and other processes are assigned subsequent integers, see for yourself with `@everywhere` macro, which runs easily code on all or a subset of processes. + +```julia +@everywhere println(myid()) +@everywhere [2,3] println(myid()) # select a subset of workers +``` + +The same way that we have added processes we can also remove them + +```julia +julia> workers() # returns array of worker ids +4-element Vector{Int64}: + 2 + 3 + 4 + 5 + +julia> rmprocs(2) # kills worker with id 2 +Task (done) @0x00007ff2d66a5e40 + +julia> workers() +3-element Vector{Int64}: + 3 + 4 + 5 +``` + +As we have seen from the `htop/top` output, added processes start with specific cmd line arguments, however they are not shared with any aliases that we may have defined, e.g. `julia` ~ `julia --project=.`. Therefore in order to use an environment, we have to first activate it on all processes + +```julia +@everywhere begin + using Pkg; Pkg.activate(@__DIR__) # @__DIR__ equivalent to a call to pwd() +end +``` + +or we can load files containing this line on all processes with cmdline option `-L ###.jl` together with `-p ##`. + +There are generally two ways of working with multiple processes + +- using low level functionality - we specify what/where is loaded, what/where is being run and when we fetch results + + `@everywhere` to run everywhere and wait for completion + + `@spawnat` and `remotecall` to run at specific process and return `Future` (a reference to a future result - remote reference) + + `fetch` - fetching remote reference + * `pmap` - for easily mapping a function over a collection + +- using high level functionality - define only simple functions and apply them on collections + + [`DistributedArrays`](https://github.com/JuliaParallel/DistributedArrays.jl)' with `DArray`s + + [`Transducers.jl`](https://github.com/JuliaFolds/Transducers.jl) pipelines + + [`Dagger.jl`](https://github.com/JuliaParallel/Dagger.jl) out-of-core and parallel computing + +### Sum with processes + +Writing your own sum of an array function is a good way to show all the potential problems, you may encounter with parallel programming. For comparison here is the naive version that uses `zero` for initialization and `@inbounds` for removing boundschecks. + +```julia +function naive_sum(a) + r = zero(eltype(a)) + for aᵢ in a + r += aᵢ + end + r +end +``` + +Its performance will serve us as a sequential baseline. + +```julia +julia> using BenchmarkTools + +julia> a = rand(10_000_000); # 10^7 + +julia> sum(a) ≈ naive_sum(a) +true + +julia> @btime sum($a) +5.011 ms (0 allocations: 0 bytes) + +julia> @btime naive_sum($a) +11.786 ms (0 allocations: 0 bytes) +``` + +Note that the built-in `sum` exploits single core parallelism with Single instruction, multiple data (SIMD instructions) and is thus faster. + +::: warning Exercise + +Write a distributed/multiprocessing version of `sum` function `dist_sum(a, np=nworkers())` without the help of `DistributedArrays`. Measure the speed up when doubling the number of workers (up to the number of logical cores - see [note](@ref lab10_thread) on hyper threading). + +**HINTS**: +- map builtin `sum` over chunks of the array using `pmap` +- there are built in partition iterators `Iterators.partition(array, chunk_size)` +- `chunk_size` should relate to the number of available workers +- `pmap` has the option to pass the ids of workers as the second argument `pmap(f, WorkerPool([2,4]), collection)` +- `pmap` collects the partial results to the controller where it can be collected with another `sum` + +::: + +::: details Show solution + +```julia +using Distributed +addprocs(4) + +@everywhere begin + using Pkg; Pkg.activate(@__DIR__) +end + +function dist_sum(a, np=nworkers()) + chunk_size = div(length(a), np) + sum(pmap(sum, WorkerPool(workers()[1:np]), Iterators.partition(a, chunk_size))) +end + +dist_sum(a) ≈ sum(a) +@btime dist_sum($a) + +@time dist_sum(a, 1) # 74ms +@time dist_sum(a, 2) # 46ms +@time dist_sum(a, 4) # 49ms +@time dist_sum(a, 8) # 35ms +``` + +::: + +As you can see the built-in `pmap` already abstracts quite a lot from the process and all the data movement is handled internally, however in order to show off how we can abstract even more, let's use the `DistributedArrays.jl` pkg. + +::: warning Exercise + +Write a distributed/multiprocessing version of `sum` function `dist_sum_lib(a, np=nworkers())` with the help of `DistributedArrays`. Measure the speed up when doubling the number of workers (up to the number of logical cores - see note on hyper threading). + +**HINTS**: +- chunking and distributing the data can be handled for us using the `distribute` function on an array (creates a `DArray`) +- `distribute` has an option to specify on which workers should an array be distributed to +- `sum` function has a method for `DArray` +- remember to run `using DistributedArrays` on every process + +::: + +::: details Show solution + +Setting up. + +```julia +using Distributed +addprocs(8) + +@everywhere begin + using Pkg; Pkg.activate(@__DIR__) +end + +@everywhere begin + using DistributedArrays +end +``` + +And the actual computation. + +```julia +adist = distribute(a) # distribute array to workers |> typeof - DArray +@time adist = distribute(a) # we should not disregard this time +@btime sum($adist) # call the built-in function (dispatch on DArrray) + +function dist_sum_lib(a, np=nworkers()) + adist = distribute(a, procs = workers()[1:np]) + sum(adist) +end + +dist_sum_lib(a) ≈ sum(a) +@btime dist_sum_lib($a) + +@time dist_sum_lib(a, 1) # 80ms +@time dist_sum_lib(a, 2) # 54ms +@time dist_sum_lib(a, 4) # 48ms +@time dist_sum_lib(a, 8) # 33ms +``` + +::: + +In both previous examples we have included the data transfer time from the controller process, in practice however distributed computing is used in situations where the data may be stored on individual local machines. As a general rule of thumb we should always send only instruction what to do and not the actual data to be processed. This will be more clearly demonstrated in the next more practical example. + +### [Distributed file processing](@id lab10_dist_file_p) + +`Distributed` is often used in processing of files, such as the commonly encountered `mapreduce` jobs with technologies like [`Hadoop`](https://hadoop.apache.org/), [`Spark`](http://spark.apache.org/), where the files live on a distributed file system and a typical job requires us to map over all the files and gather some statistics such as histograms, sums and others. We will simulate this situation with the Julia's pkg codebase, which on a typical user installation can contain up to hundreds of thousand of `.jl` files (depending on how extensively one uses Julia). + +::: warning Exercise + +Write a distributed pipeline for computing a histogram of symbols found in AST by parsing Julia source files in your `.julia/packages/` directory. We have already implemented most of the code that you will need (available as source code [here](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/docs/src/lecture_10/pkg_processing.jl)). + +Your task is to write a function that does the `map` and `reduce` steps, that will create and gather the dictionaries from different workers. There are two ways to do a map +- either over directories inside `.julia/packages/` - call it `distributed_histogram_pkgwise` +- or over all files obtained by concatenation of `filter_jl` outputs (*NOTE* that this might not be possible if the listing itself is expensive - speed or memory requirements) - call it `distributed_histogram_filewise` +Measure if the speed up scales linearly with the number of processes by restricting the number of workers inside a `pmap`. + +**HINTS**: +- for each file path apply `tokenize` to extract symbols and follow it with the update of a local histogram +- try writing sequential version first +- either load `./pkg_processing.jl` on startup with `-L` and `-p` options or `include("./pkg_processing.jl")` inside `@everywhere` +- use `pmap` to easily iterate in parallel over a collection - the result should be an array of histogram, which has to be merged on the controller node (use builtin `mergewith!` function in conjunction with `reduce`) +- `pmap` supports `do` syntax + +```julia +pmap(collection) do item + do_something(item) +end +``` +- pkg directory can be obtained with `joinpath(DEPOT_PATH[1], "packages")` + +**BONUS**: What is the most frequent symbol in your codebase? + +::: + +::: details Show `pkg_processing.jl` + +```@eval +using Markdown + +Markdown.parse(string("```julia\n", readchomp("./pkg_processing.jl"), "\n```")) +``` + +::: + +::: details Show solution + +Let's implement first a sequential version as it is much easier to debug. + +```julia +include("./pkg_processing.jl") + +using ProgressMeter +function sequential_histogram(path) + h = Dict{Symbol, Int}() + @showprogress for pkg_dir in sample_all_installed_pkgs(path) + for jl_path in filter_jl(pkg_dir) + syms = tokenize(jl_path) + for s in syms + v = get!(h, s, 0) + h[s] += 1 + end + end + end + h +end +path = joinpath(DEPOT_PATH[1], "packages") # usually the first entry +@time h = sequential_histogram(path) # 87s +``` + +First we try to distribute over package folders. **TODO** add the ability to run it only on some workers + +```julia +using Distributed +addprocs(8) + +@everywhere begin + using Pkg; Pkg.activate(@__DIR__) + # we have to realize that the code that workers have access to functions we have defined + include("./pkg_processing.jl") +end + +""" + merge_with!(h1, h2) + +Merges count dictionary `h2` into `h1` by adding the counts. Equivalent to `Base.mergewith!(+)`. +""" +function merge_with!(h1, h2) + for s in keys(h2) + get!(h1, s, 0) + h1[s] += h2[s] + end + h1 +end + +using ProgressMeter + +function distributed_histogram_pkgwise(path, np=nworkers()) + r = @showprogress pmap(WorkerPool(workers()[1:np]), sample_all_installed_pkgs(path)) do pkg_dir + h = Dict{Symbol, Int}() + for jl_path in filter_jl(pkg_dir) + syms = tokenize(jl_path) + for s in syms + v = get!(h, s, 0) + h[s] += 1 + end + end + h + end + reduce(merge_with!, r) +end +path = joinpath(DEPOT_PATH[1], "packages") + +@time h = distributed_histogram_pkgwise(path, 2) # 41.5s +@time h = distributed_histogram_pkgwise(path, 4) # 24.0s +@time h = distributed_histogram_pkgwise(path, 8) # 24.0s +``` + +Second we try to distribute over all files. + +```julia +function distributed_histogram_filewise(path, np=nworkers()) + jl_files = reduce(vcat, filter_jl(pkg_dir) for pkg_dir in sample_all_installed_pkgs(path)) + r = @showprogress pmap(WorkerPool(workers()[1:np]), jl_files) do jl_path + h = Dict{Symbol, Int}() + syms = tokenize(jl_path) + for s in syms + v = get!(h, s, 0) + h[s] += 1 + end + h + end + reduce(merge_with!, r) +end + +path = joinpath(DEPOT_PATH[1], "packages") +@time h = distributed_histogram_pkgwise(path, 2) # 46.9s +@time h = distributed_histogram_pkgwise(path, 4) # 24.8s +@time h = distributed_histogram_pkgwise(path, 8) # 20.4s +``` + +Here we can see that we have improved the timings a bit by increasing granularity of tasks. + +**BONUS**: You can do some analysis with `DataFrames` + +```julia +using DataFrames +df = DataFrame(:sym => collect(keys(h)), :count => collect(values(h))); +sort!(df, :count, rev=true); +df[1:50,:] +``` + +::: + + +## [Threading](@id lab10_thread) + +The number of threads for a Julia process can be set up in an environmental variable `JULIA_NUM_THREADS` or directly on Julia startup with cmd line option `-t ##` or `--threads ##`. If both are specified the latter takes precedence. + +```bash +julia -t 8 +``` + +In order to find out how many threads are currently available, there exist the `nthreads` function inside `Base.Threads` library. There is also an analog to the Distributed `myid` example, called `threadid`. + +```julia +julia> using Base.Threads + +julia> nthreads() +8 + +julia> threadid() +1 +``` + +As opposed to distributed/multiprocessing programming, threads have access to the whole memory of Julia's process, therefore we don't have to deal with separate environment manipulation, code loading and data transfers. However we have to be aware of the fact that memory can be modified from two different places and that there may be some performance penalties of accessing memory that is physically further from a given core (e.g. caches of different core or different NUMA[^3] nodes). Another significant difference from distributed computing is that we cannot spawn additional threads on the fly in the same way that we have been able to do with `addprocs` function. + +[^3]: NUMA - [https://en.wikipedia.org/wiki/Non-uniform\_memory\_access](https://en.wikipedia.org/wiki/Non-uniform_memory_access) + +::: tip Hyper threads + +In most of today's CPUs the number of threads is larger than the number of physical cores. These additional threads are usually called hyper threads[^4] or when talking about cores - logical cores. The technology relies on the fact, that for a given "instruction" there may be underutilized parts of the CPU core's machinery (such as one of many arithmetic units) and if a suitable work/instruction comes in it can be run simultaneously. In practice this means that adding more threads than physical cores may not be accompanied with the expected speed up. + +::: + +[^4]: Hyperthreading - [https://en.wikipedia.org/wiki/Hyper-threading](https://en.wikipedia.org/wiki/Hyper-threading) + +The easiest (not always yielding the correct result) way how to turn a code into multi threaded code is putting the `@threads` macro in front of a for loop, which instructs Julia to run the body on separate threads. + +```julia +julia> A = Array{Union{Int,Missing}}(missing, nthreads()); + +julia> for i in 1:nthreads() + A[threadid()] = threadid() +end + +julia> A # only the first element is filled +8-element Vector{Union{Missing, Int64}}: + 1 + missing + missing + missing + missing + missing + missing + missing +``` + +```julia +julia> A = Array{Union{Int,Missing}}(missing, nthreads()); + +julia> @threads for i in 1:nthreads() + A[threadid()] = threadid() +end + +julia> A # the expected results +8-element Vector{Union{Missing, Int64}}: + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 +``` + +### Multithreaded sum + +Armed with this knowledge let's tackle the problem of the simple `sum`. + +```julia +function threaded_sum_naive(a) + r = zero(eltype(a)) + @threads for i in eachindex(a) + @inbounds r += a[i] + end + return r +end +``` + +Comparing this with the built-in sum we see not an insignificant discrepancy (one that cannot be explained by reordering of computation) and moreover the timings show us some ridiculous overhead. + +```julia +julia> using BenchmarkTools + +julia> a = rand(10_000_000); # 10^7 + +julia> sum(a), threaded_sum_naive(a) +(5.000577175855193e6, 625888.2270955174) + +julia> @btime sum($a) + 4.861 ms (0 allocations: 0 bytes) + +julia> @btime threaded_sum_naive($a) + 163.379 ms (20000042 allocations: 305.18 MiB) +``` + +Recalling what has been said above we have to be aware of the fact that the data can be accessed from multiple threads at once, which if not taken into an account means that each thread reads possibly outdated value and overwrites it with its own updated state. + +There are two solutions which we will tackle in the next two exercises. + +::: warning Exercise + +Implement `threaded_sum_atom`, which uses `Atomic` wrapper around the accumulator variable `r` in order to ensure correct locking of data access. + +**HINTS**: +- use `atomic_add!` as a replacement of `r += A[i]` +- "collect" the result by dereferencing variable `r` with empty bracket operator `[]` + +**BONUS**: Try chunking the array and calling sum on individual chunks to obtain some real speedup. + +::: + +::: tip Side note on dereferencing + +In Julia we can create references to a data types, which are guarranteed to point to correct and allocated type in memory, as long as a reference exists the memory is not garbage collected. These are constructed with `Ref(x)`, `Ref(a, 7)` or `Ref{T}()` for reference to variable `x`, `7`th element of array `a` and an empty reference respectively. Dereferencing aka asking about the underlying value is done using empty bracket operator `[]`. + +```@repl lab10_refs +x = 1 # integer +rx = Ref(x) # reference to that particular integer `x` +x == rx[] # dereferencing yields the same value +``` + +There also exist unsafe references/pointers `Ptr`, however we should not really come into a contact with those. + +::: + +::: details Show solution + +```julia +function threaded_sum_atom(a) + r = Atomic{eltype(a)}(zero(eltype(a))) + @threads for i in eachindex(a) + @inbounds atomic_add!(r, a[i]) + end + return r[] +end +``` + +```julia +julia> sum(a) ≈ threaded_sum_atom(a) +true + +julia> @btime threaded_sum_atom($a) + 661.502 ms (42 allocations: 3.66 KiB) +``` + +That's better but far from the performance we need. + +**BONUS**: There is a fancier and faster way to do this by chunking the array + +```julia +function threaded_sum_fancy_atom(a) + r = Atomic{eltype(a)}(zero(eltype(a))) + len, rem = divrem(length(a), nthreads()) + @threads for t in 1:nthreads() + rₜ = zero(eltype(a)) + @simd for i in (1:len) .+ (t-1)*len + @inbounds rₜ += a[i] + end + atomic_add!(r, rₜ) + end + # catch up any stragglers + result = r[] + @simd for i in length(a)-rem+1:length(a) + @inbounds result += a[i] + end + return result +end +``` + +```julia +julia> sum(a) ≈ threaded_sum_fancy_atom(a) +true +julia> @btime threaded_sum_fancy_atom($a) + 2.983 ms (42 allocations: 3.67 KiB) +``` + +Finally we have beaten the "sequential" sum. The quotes are intentional, because the `Base`'s implementation of a sum uses Single instruction, multiple data (SIMD) instructions as well, which allow to process multiple elements at once. + +::: + +::: warning Exercise + +Implement `threaded_sum_buffer`, which uses an array of length `nthreads()` (we will call this buffer) for local aggregation of results of individual threads. + +**HINTS**: +- use `threadid()` to index the buffer array +- sum the buffer array to obtain final result + +::: + +::: details Show solution + +```julia +function threaded_sum_buffer(a) + R = zeros(eltype(a), nthreads()) + @threads for i in eachindex(a) + @inbounds R[threadid()] += a[i] + end + r = zero(eltype(a)) + # sum the partial results from each thread + for i in eachindex(R) + @inbounds r += R[i] + end + return r +end +``` + +```julia +julia> sum(a) ≈ threaded_sum_buffer(a) +true +julia> @btime threaded_sum_buffer($a) + 2.750 ms (42 allocations: 3.78 KiB) +``` + +Though this implementation is cleaner and faster, there is possible drawback with this implementation, as the buffer `R` lives in a continuous part of the memory and each thread that accesses it brings it to its caches as a whole, thus invalidating the values for the other threads, which it in the same way. + +::: + +Seeing how multithreading works on a simple example, let's apply it on the "more practical" case of the Symbol histogram from exercise [above](@ref lab10_dist_file_p). + +### [Multithreaded file processing](@id lab10_dist_file_t) + +::: warning Exercise + +Write a multithreaded analog of the file processing pipeline from [exercise](@ref lab10_dist_file_p) above. Again the task is to write the `map` and `reduce` steps, that will create and gather the dictionaries from different workers. There are two ways to map + +- either over directories inside `.julia/packages/` - `threaded_histogram_pkgwise` +- or over all files obtained by concatenation of `filter_jl` outputs - `threaded_histogram_filewise` +Compare the speedup with the version using process based parallelism. + +**HINTS**: +- create a separate dictionary for each thread in order to avoid the need for atomic operations + +**BONUS**: In each of the cases count how many files/pkgs each thread processed. Would the dynamic scheduler help us in this situation? + +::: + +::: details Show solution + +Setup is now much simpler. + +```julia +using Base.Threads +include("./pkg_processing.jl") +path = joinpath(DEPOT_PATH[1], "packages") +``` + +Firstly the version with folder-wise parallelism. + +```julia +function threaded_histogram_pkgwise(path) + ht = [Dict{Symbol, Int}() for _ in 1:nthreads()] + @threads for pkg_dir in sample_all_installed_pkgs(path) + h = ht[threadid()] + for jl_path in filter_jl(pkg_dir) + syms = tokenize(jl_path) + for s in syms + v = get!(h, s, 0) + h[s] += 1 + end + end + end + reduce(mergewith!(+), ht) +end +``` + +```julia +julia> @time h = threaded_histogram_pkgwise(path) + 26.958786 seconds (81.69 M allocations: 10.384 GiB, 4.58% gc time) +``` + +Secondly the version with file-wise parallelism. + +```julia +function threaded_histogram_filewise(path) + jl_files = reduce(vcat, filter_jl(pkg_dir) for pkg_dir in sample_all_installed_pkgs(path)) + ht = [Dict{Symbol, Int}() for _ in 1:nthreads()] + @threads for jl_path in jl_files + h = ht[threadid()] + syms = tokenize(jl_path) + for s in syms + v = get!(h, s, 0) + h[s] += 1 + end + end + reduce(mergewith!(+), ht) +end +``` + +```julia +julia> @time h = threaded_histogram_filewise(path) + 29.677184 seconds (81.66 M allocations: 10.411 GiB, 4.13% gc time) +``` + +::: + +## Task switching + +There is a way how to run "multiple" things at once, which does not necessarily involve either threads or processes. In Julia this concept is called task switching or asynchronous programming, where we fire off our requests in a short time and let the cpu/os/network handle the distribution. As an example which we will try today is querying a web API, which has some variable latency. In the usuall sequantial fashion we can always post queries one at a time, however generally the APIs can handle multiple request at a time, therefore in order to better utilize them, we can call them asynchronously and fetch all results later, in some cases this will be faster. + +::: tip Burst requests + +It is a good practice to check if an API supports some sort of batch request, because making a burst of single request might lead to a worse performance for others and a possible blocking of your IP/API key. + +::: + +Consider following functions + +```julia +function a() + for i in 1:10 + sleep(1) + end +end + +function b() + for i in 1:10 + @async sleep(1) + end +end + +function c() + @sync for i in 1:10 + @async sleep(1) + end +end +``` + +How much time will the execution of each of them take? + +::: details Show solution + +```julia +@time a() # 10s +@time b() # ~0s +@time c() # >~1s +``` + +::: + +::: warning Exercise + +Choose one of the free web APIs and query its endpoint using the `HTTP.jl` library. Implement both sequential and asynchronous version. Compare them on an burst of 10 requests. + +**HINTS**: +- use `HTTP.request` for `GET` requests on your chosen API, e.g. `r = HTTP.request("GET", "https://catfact.ninja/fact")` for random cat fact +- converting body of a response can be done simply by constructing a `String` out of it - `String(r.body)` +- in order to parse a json string use `JSON.jl`'s parse function +- Julia offers `asyncmap` - asynchronous `map` + +::: + +::: details Show solution + +```julia +using HTTP, JSON + +function query_cat_fact() + r = HTTP.request("GET", "https://catfact.ninja/fact") + j = String(r.body) + d = JSON.parse(j) + d["fact"] +end + +# without asyncmap +function get_cat_facts_async(n) + facts = Vector{String}(undef, n) + @sync for i in 1:10 + @async facts[i] = query_cat_fact() + end + facts +end + +get_cat_facts_async(n) = asyncmap(x -> query_cat_fact(), Base.OneTo(n)) +get_cat_facts(n) = map(x -> query_cat_fact(), Base.OneTo(n)) + +@time get_cat_facts_async(10) # ~0.15s +@time get_cat_facts(10) # ~1.1s +``` + +::: + +# Resources +- parallel computing [course](https://juliacomputing.com/resources/webinars/) by Julia Computing \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_10/lecture.md b/docs_vitepress/src/lectures/lecture_10/lecture.md new file mode 100644 index 00000000..664ff1d2 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/lecture.md @@ -0,0 +1,920 @@ +# Parallel programming with Julia + +Julia offers different levels of parallel programming + +- distributed processing, where jobs are split among different Julia processes +- multi-threadding, where jobs are split among multiple threads within the same processes +- SIMD instructions +- Task switching. + +In this lecture, we will focus mainly on the first two, since SIMD instructions are mainly used for low-level optimization (such as writing your own very performant BLAS library), and task switching is not a true paralelism, but allows to run a different task when one task is waiting for example for IO. + +**The most important lesson is that before you jump into the parallelism, be certain you have made your sequential code as fast as possible.** + +## Process-level paralelism + +Process-level paralelism means we run several instances of Julia (in different processes) and they communicate between each other using inter-process communication (IPC). The implementation of IPC differs if parallel julia instances share the same machine, or they are on different machines spread over the network. By default, different processes *do not share any libraries or any variables*. They are loaded clean and it is up to the user to set-up all needed code and data. + +Julia's default modus operandi is a single *main* instance controlling several workers. This main instance has `myid() == 1`, worker processes receive higher numbers. Julia can be started with multiple workers from the very beggining, using `-p` switch as + +```julia +julia -p n +``` + +where `n` is the number of workers, or you can add workers after Julia has been started by + +```julia +using Distributed +addprocs(n) +``` + +You can also remove workers using `rmprocs`. When Julia is started with `-p`, `Distributed` library is loaded by default on main worker. Workers can be on the same physical machines, or on different machines. Julia offer integration via `ClusterManagers.jl` with most schedulling systems. + +If you want to evaluate piece of code on all workers including main process, a convenience macro `@everywhere` is offered. + +```julia +@everywhere @show myid() +``` + +As we have mentioned, workers are loaded without libraries. We can see that by running + +```julia +@everywhere InteractiveUtils.varinfo() +``` + +which fails, but after loading `InteractiveUtils` everywhere + +```julia +using Statistics +@everywhere begin + using InteractiveUtils + println(InteractiveUtils.varinfo(;imported = true)) +end +``` + +we see that `Statistics` was loaded only on the main process. Thus, there is not magical sharing of data and code. With `@everywhere` macro we can define function and variables, and import libraries on workers as + +```julia +@everywhere begin + foo(x, y) = x * y + sin(y) + foo(x) = foo(x, myid()) + x = rand() +end + +@everywhere @show foo(1.0) +@everywhere @show x +``` + +The fact that `x` has different values on different workers and master demonstrates again the independency of processes. While we can set up everything using `@everywhere` macro, we can also put all the code for workers into a separate file, e.g. `worker.jl` and load it on all workers using `-L worker.jl`. + +Julia's multi-processing model is based on message-passing paradigm, but the abstraction is more akin to procedure calls. This means that users are saved from prepending messages with headers and implementing logic deciding which function should be called for thich header. Instead, we can *schedulle* an execution of a function on a remote worker and return the control immeadiately to continue in our job. A low-level function providing this functionality is `remotecall(fun, worker_id, args...)`. For example + +```julia +@everywhere begin + function delayed_foo(x, y, n ) + sleep(n) + println("woked up") + foo(x, y) + end +end +r = remotecall(delayed_foo, 2, 1, 1, 60) +``` + +returns immediately, even though the function will take at least 60 seconds. `r` does not contain result of `foo(1, 1)`, but a struct `Future`, which is a *remote reference* in Julia's terminology. It points to data located on some machine, indicates, if they are available and allows to `fetch` them from the remote worker. `fetch` is blocking, which means that the execution is blocked until data are available (if they are never available, the process can wait forever.) The presence of data can be checked using `isready`, which in case of `Future` returned from `remote_call` indicate that the computation has finished. + +```julia +isready(r) +fetch(r) == foo(1, 1) +``` + +An advantage of the remote reference is that it can be freely shared around processes and the result can be retrieved on different node then the one which issued the call.s + +```julia +r = remotecall(delayed_foo, 2, 1, 1, 60) +remotecall(r -> println("value: ",fetch(r), " retrieved on ", myid()) , 3, r) +``` + +An interesting feature of `fetch` is that it re-throw an exception raised on a different process. + +```julia +@everywhere begin + function exfoo() + throw("Exception from $(myid())") + end +end +r = @spawnat 2 exfoo() +``` + +where we have used `@spawnat` instead of `remote_call`. It is higher level alternative executing a closure around the expression (in this case `exfoo()`) on a specified worker, in this case 2. Coming back to the example, when we fetch the result `r`, the exception is throwed on the main process, not on the worker + +```julia +fetch(r) +``` + +`@spawnat` can be executed with `:any` to signal that the user does not care, where the function will be executed and it will be left up to Julia. + +```julia +r = @spawnat :any foo(1,1) +fetch(r) +``` + +Finally, if you would for some reason need to wait for the computed value, you can use + +```julia +remotecall_fetch(foo, 2, 1, 1) +``` + +## Running example: Julia sets + +Our example for explaining mechanisms of distributed computing will be Julia set fractals, as they can be easily paralelized. The example is adapted from [Eric Aubanel](http://www.cs.unb.ca/~aubanel/JuliaMultithreadingNotes.html). Some fractals (Julia set, Mandelbrot) are determined by properties of some complex-valued functions. Julia set counts, how many iteration is required for ``f(z) = z^2+c`` to be bigger than two in absolute value, ``|f(z)| >=2 ``. The number of iterations can then be mapped to the pixel's color, which creates a nice visualization we know. + +```julia +function juliaset_pixel(z₀, c) + z = z₀ + for i in 1:255 + abs2(z)> 4.0 && return (i - 1)%UInt8 + z = z*z + c + end + return UInt8(255) +end +``` + +A nice property of fractals like Julia set is that the computation can be easily paralelized, since the value of each pixel is independent from the remaining. In our experiments, the level of granulity will be one column, since calculation of single pixel is so fast, that thread or process switching will have much higher overhead. + +```julia +function juliaset_column!(img, c, n, j) + x = -2.0 + (j-1)*4.0/(n-1) + for i in 1:n + y = -2.0 + (i-1)*4.0/(n-1) + @inbounds img[i,j] = juliaset_pixel(x+im*y, c) + end + nothing +end +``` + +To calculate full image + +```julia +function juliaset(x, y, n=1000) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + for j in 1:n + juliaset_column!(img, c, n, j) + end + return img +end +``` + +and run it and view it + +```julia +using Plots + +frac = juliaset(-0.79, 0.15) +plot(heatmap(1:size(frac,1),1:size(frac,2), frac, color=:Spectral)) +``` + +or with GLMakie + +```julia +using GLMakie + +frac = juliaset(-0.79, 0.15) +heatmap(frac) +``` + +To observe the execution length, we will use `BenchmarkTools.jl` + +```julia +julia> using BenchmarkTools + +julia> @btime juliaset(-0.79, 0.15); + 39.822 ms (2 allocations: 976.70 KiB) +``` + +Let's now try to speed-up the computation using more processes. We first make functions available to workers + +```julia +@everywhere begin + function juliaset_pixel(z₀, c) + z = z₀ + for i in 1:255 + abs2(z)> 4.0 && return (i - 1)%UInt8 + z = z*z + c + end + return UInt8(255) + end + + function juliaset_column!(img, c, n, colj, j) + x = -2.0 + (j-1)*4.0/(n-1) + for i in 1:n + y = -2.0 + (i-1)*4.0/(n-1) + @inbounds img[i,colj] = juliaset_pixel(x+im*y, c) + end + nothing + end +end +``` + +For the actual parallelisation, we split the computation of the whole image into bands, such that each worker computes a smaller portion. + +```julia +@everywhere begin + function juliaset_columns(c, n, columns) + img = Array{UInt8,2}(undef, n, length(columns)) + for (colj, j) in enumerate(columns) + juliaset_column!(img, c, n, colj, j) + end + img + end +end + +function juliaset_spawn(x, y, n = 1000) + c = x + y*im + columns = Iterators.partition(1:n, div(n, nworkers())) + r_bands = [@spawnat w juliaset_columns(c, n, cols) for (w, cols) in enumerate(columns)] + slices = map(fetch, r_bands) + reduce(hcat, slices) +end +``` + +we observe some speed-up over the serial version, but not linear in terms of number of workers + +```julia +julia> @btime juliaset(-0.79, 0.15); + 38.699 ms (2 allocations: 976.70 KiB) + +julia> @btime juliaset_spawn(-0.79, 0.15); + 21.521 ms (480 allocations: 1.93 MiB) +``` + +In the above example, we spawn one function on each worker and collect the results. In essence, we are performing `map` over bands. Julia offers for this usecase a parallel version of map `pmap`. With that, our example can look like + +```julia +function juliaset_pmap(x, y, n = 1000, np = nworkers()) + c = x + y*im + columns = Iterators.partition(1:n, div(n, np)) + slices = pmap(cols -> juliaset_columns(c, n, cols), columns) + reduce(hcat, slices) +end + +julia> @btime juliaset_pmap(-0.79, 0.15); + 17.597 ms (451 allocations: 1.93 MiB) +``` + +which has slightly better timing then the version based on `@spawnat` and `fetch` (as explained below in section about `Threads`, the parallel computation of Julia set suffers from each pixel taking different time to compute, which can be relieved by dividing the work into more parts: + +```julia +julia> @btime juliaset_pmap(-0.79, 0.15, 1000, 16); + 12.686 ms (1439 allocations: 1.96 MiB) +``` + +## Shared memory + +When main and all workers are located on the same process, and the OS supports sharing memory between processes (by sharing memory pages), we can use `SharedArrays` to avoid sending the matrix with results. + +```julia +@everywhere begin + using SharedArrays + function juliaset_shared(x, y, n=1000) + c = x + y*im + img = SharedArray(Array{UInt8,2}(undef,n,n)) + @sync @distributed for j in 1:n + juliaset_column!(img, c, n, j, j) + end + return img + end +end + +julia> @btime juliaset_shared(-0.79, 0.15); + 19.088 ms (963 allocations: 1017.92 KiB) +``` + +The allocation of the Shared Array mich be costly, let's try to put the allocation outside of the loop + +```julia +img = SharedArray(Array{UInt8,2}(undef,1000,1000)) + +function juliaset_shared!(img, x, y, n=1000) + c = x + y*im + @sync @distributed for j in 1:n + juliaset_column!(img, c, n, j, j) + end + return img +end +``` + +```julia +julia> @btime juliaset_shared!(img, -0.79, 0.15); + 17.399 ms (614 allocations: 27.61 KiB) +``` + +but both versions are not akin. It seems like the alocation of `SharedArray` costs approximately `2`ms. + +`@distributed for` (`Distributed.pfor`) does not allows to supply, as it splits the for cycle to `nworkers()` processes. Above we have seen that more splits is better + +```julia +@everywhere begin + function juliaset_columns!(img, c, n, columns) + for (colj, j) in enumerate(columns) + juliaset_column!(img, c, n, colj, j) + end + end +end + +img = SharedArray(Array{UInt8,2}(undef,1000,1000)) +function juliaset_shared!(img, x, y, n=1000, np = nworkers()) + c = x + y*im + columns = Iterators.partition(1:n, div(n, np)) + pmap(cols -> juliaset_columns!(img, c, n, cols), columns) + return img +end +``` + +```julia +julia> @btime juliaset_shared!(img, -0.79, 0.15, 1000, 16); + 11.760 ms (1710 allocations: 85.98 KiB) +``` + +Which is almost 1ms faster than without used of pre-allocated `SharedArray`. Notice the speedup is now `38.699 / 11.76 = 3.29×` + +## Synchronization / Communication primitives + +The orchestration of a complicated computation might be difficult with relatively low-level remote calls. A *producer / consumer* paradigm is a synchronization paradigm that uses queues. Consumer fetches work intructions from the queue and pushes results to different queue. Julia supports this paradigm with `Channel` and `RemoteChannel` primitives. Importantly, putting to and taking from queue is an atomic operation, hence we do not have take care of race conditions. +The code for the worker might look like + +```julia +@everywhere begin + function juliaset_channel_worker(instructions, results) + while isready(instructions) + c, n, cols = take!(instructions) + put!(results, (cols, juliaset_columns(c, n, cols))) + end + end +end +``` + +The code for the main will look like + +```julia +function juliaset_channels(x, y, n = 1000, np = nworkers()) + c = x + y*im + columns = Iterators.partition(1:n, div(n, np)) + instructions = RemoteChannel(() -> Channel(np)) + foreach(cols -> put!(instructions, (c, n, cols)), columns) + results = RemoteChannel(()->Channel(np)) + rfuns = [@spawnat i juliaset_channel_worker(instructions, results) for i in workers()] + + img = Array{UInt8,2}(undef, n, n) + for i in 1:np + cols, impart = take!(results) + img[:,cols] .= impart; + end + img +end +``` + +```julia +julia> @btime juliaset_channels(-0.79, 0.15); +``` + +The execution time is much higher then what we have observed in the previous cases and changing the number of workers does not help much. What went wrong? The reason is that setting up the infrastructure around remote channels is a costly process. Consider the following alternative, where (i) we let workers to run endlessly and (ii) the channel infrastructure is set-up once and wrapped into an anonymous function + +```julia +@everywhere begin + function juliaset_channel_worker(instructions, results) + while true + c, n, cols = take!(instructions) + put!(results, (cols, juliaset_columns(c, n, cols))) + end + end +end + +function juliaset_init(x, y, n = 1000, np = nworkers()) + c = x + y*im + columns = Iterators.partition(1:n, div(n, np)) + T = Tuple{ComplexF64,Int64,UnitRange{Int64}} + instructions = RemoteChannel(() -> Channel{T}(np)) + T = Tuple{UnitRange{Int64},Array{UInt8,2}} + results = RemoteChannel(()->Channel{T}(np)) + foreach(p -> remote_do(juliaset_channel_worker, p, instructions, results), workers()) + function compute() + img = Array{UInt8,2}(undef, n, n) + foreach(cols -> put!(instructions, (c, n, cols)), columns) + for i in 1:np + cols, impart = take!(results) + img[:,cols] .= impart; + end + img + end +end +``` + +```julia +t = juliaset_init(-0.79, 0.15) +julia> @btime t(); + 17.697 ms (776 allocations: 1.94 MiB) +``` + +with which we obtain the comparable speed to the `pmap` approach. + +::: tip `remote_do` vs `remote_call` + +Instead of `@spawnat` (`remote_call`) we can also use `remote_do` as foreach`(p -> remote_do(juliaset_channel_worker, p, instructions, results), workers)`, which executes the function `juliaset_channel_worker` at worker `p` with parameters `instructions` and `results` but does not return `Future` handle to receive the future results. + +::: + +::: tip `Channel` and `RemoteChannel` + +`AbstractChannel` has to implement the interface `put!`, `take!`, `fetch`, `isready` and `wait`, i.e. it should behave like a queue. `Channel` is an implementation if an `AbstractChannel` that facilitates a communication within a single process (for the purpose of multi-threadding and task switching). Channel can be easily created by `Channel{T}(capacity)`, which can be infinite. The storage of a channel can be seen in `data` field, but a direct access will of course break all guarantees like atomicity of `take!` and `put!`. For communication between proccesses, the `<:AbstractChannel` has to be wrapped in `RemoteChannel`. The constructor for `RemoteChannel(f::Function, pid::Integer=myid())` has a first argument a function (without arguments) which constructs the `Channel` (or something like that) on the remote machine identified by `pid` and returns the `RemoteChannel`. The storage thus resides on the machine specified by `pid` and the handle provided by the `RemoteChannel` can be freely passed to any process. (For curious, `ProcessGroup` `Distributed.PGRP` contains an information about channels on machines.) + +::: + +In the above example, `juliaset_channel_worker` defined as + +```julia +function juliaset_channel_worker(instructions, results) + while true + c, n, cols = take!(instructions) + put!(results, (cols, juliaset_columns(c, n, cols))) + end +end +``` + +runs forever due to the `while true` loop. + +Julia does not provide by default any facility to kill the remote execution except sending `ctrl-c` to the remote worker as `interrupt(pids::Integer...)`. To stop the computation, we usually extend the type accepted by the `instructions` channel to accept some stopping token (e.g. :stop) and stop. + +```julia +@everywhere begin + function juliaset_channel_worker(instructions, results) + while true + i = take!(instructions) + i === :stop && break + c, n, cols = i + put!(results, (cols, juliaset_columns(c, n, cols))) + end + println("worker $(myid()) stopped") + put!(results, :stop) + end +end + +function juliaset_init(x, y, n = 1000, np = nworkers()) + c = x + y*im + columns = Iterators.partition(1:n, div(n, np)) + instructions = RemoteChannel(() -> Channel(np)) + results = RemoteChannel(()->Channel(np)) + foreach(p -> remote_do(juliaset_channel_worker, p, instructions, results), workers()) + function compute() + img = Array{UInt8,2}(undef, n, n) + foreach(cols -> put!(instructions, (c, n, cols)), columns) + for i in 1:np + cols, impart = take!(results) + img[:,cols] .= impart; + end + img + end +end + +t = juliaset_init(-0.79, 0.15) +t() +foreach(i -> put!(t.instructions, :stop), workers()) +``` + +In the above example we paid the price of introducing type instability into the channels, which now contain types `Any` instead of carefully constructed tuples. But the impact on the overall running time is negligible + +```julia +julia> t = juliaset_init(-0.79, 0.15) + +julia> @btime t() + 17.551 ms (774 allocations: 1.94 MiB) + +julia> foreach(i -> put!(t.instructions, :stop), workers()) +``` + +In some use-cases, the alternative can be to put all jobs to the `RemoteChannel` before workers are started, and then stop the workers when the remote channel is empty as + +```julia +@everywhere begin + function juliaset_channel_worker(instructions, results) + while !isready(instructions) + c, n, cols = take!(instructions) + put!(results, (cols, juliaset_columns(c, n, cols))) + end + end +end +``` + +## Sending data + +Sending parameters of functions and receiving results from a remotely called functions migh incur a significant cost. + +1. Try to minimize the data movement as much as possible. A prototypical example is + ```julia + A = rand(1000,1000); + Bref = @spawnat :any A^2; + ``` + + and + + ```julia + Bref = @spawnat :any rand(1000,1000)^2; + ``` + +2. It is not only volume of data (in terms of the number of bytes), but also a complexity of objects that are being sent. Serialization can be very time consuming, an efficient converstion to something simple might be worth + +```julia +using BenchmarkTools + +@everywhere begin + using Random + v = [randstring(rand(1:20)) for i in 1:1000]; + p = [i => v[i] for i in 1:1000] + d = Dict(p) + + send_vec() = v + send_dict() = d + send_pairs() = p + custom_serialization() = (length.(v), join(v, "")) +end + +@btime remotecall_fetch(send_vec, 2); +@btime remotecall_fetch(send_dict, 2); +@btime remotecall_fetch(send_pairs, 2); +@btime remotecall_fetch(custom_serialization, 2); +``` + +3. Some type of objects cannot be properly serialized and deserialized + +```julia +a = IdDict( + :a => rand(1,1), + ) +b = remotecall_fetch(identity, 2, a) +a[:a] === a[:a] +a[:a] === b[:a] +``` + +4. If you need to send the data to worker, i.e. you want to define (overwrite) a global variable there + +```julia +@everywhere begin + g = rand() + show_secret() = println("secret of ", myid(), " is ", g) +end +@everywhere show_secret() + +for i in workers() + remotecall_fetch(g -> eval(:(g = $(g))), i, g) +end +@everywhere show_secret() +``` + +which is implemented in the `ParallelDataTransfer.jl` with other variants, but in general, this construct should be avoided. + +Alternatively, you can overwrite a global variable +```julia +@everywhere begin + g = rand() + show_secret() = println("secret of ", myid(), " is ", g) + function set_g(x) + global g + g = x + nothing + end +end + +@everywhere show_secret() +remote_do(set_g, 2, 2) +@everywhere show_secret() +``` + +## Practical advices + +Recall that (i) workers are started as clean processes and (ii) they might not share the same environment with the main process. The latter is due to the possibility of remote machines to have a different directory structure. + +```julia +@everywhere begin + using Pkg + println(Pkg.project().path) +end +``` + +Our advices earned by practice are: + +- to have shared directory (shared home) with code and to share the location of packages +- to place all code for workers to one file, let's call it `worker.jl` (author of this includes the code for master as well). +- put to the beggining of `worker.jl` code activating specified environment as (or specify environmnet for all workers in environment variable as `export JULIA_PROJECT="$PWD"`) + +```julia +using Pkg +Pkg.activate(@__DIR__) +``` + +and optionally + +```julia +Pkg.resolve() +Pkg.instantiate() +``` + +- run julia as + +```julia +julia -p ?? -L worker.jl main.jl +``` + +where `main.jl` is the script to be executed on the main node. Or + +```julia +julia -p ?? -L worker.jl -e "main()" +``` + +where `main()` is the function defined in `worker.jl` to be executed on the main node. + +A complete example can be seen in [`juliaset_p.jl`](juliaset_p.jl). + +## Multi-threadding + +So far, we have been able to decrese the computation from 39ms to something like 13ms. Can we improve? Let's now turn our attention to multi-threadding, where we will not pay the penalty for IPC. Moreover, the computation of Julia set is multi-thread friendly, as all the memory can be pre-allocatted. We slightly modify our code to accept different methods distributing the work among slices in the pre-allocated matrix. To start Julia with support of multi-threadding, run it with `julia -t n`, where `n` is the number of threads. It is reccomended to set `n` to number of physical cores, since in hyper-threadding two threads shares arithmetic units of a single core, and in applications for which Julia was built, they are usually saturated. + +```julia +using BenchmarkTools +function juliaset_pixel(z₀, c) + z = z₀ + for i in 1:255 + abs2(z)> 4.0 && return (i - 1)%UInt8 + z = z*z + c + end + return UInt8(255) +end + +function juliaset_column!(img, c, n, j) + x = -2.0 + (j-1)*4.0/(n-1) + for i in 1:n + y = -2.0 + (i-1)*4.0/(n-1) + @inbounds img[i,j] = juliaset_pixel(x+im*y, c) + end + nothing +end + +function juliaset(x, y, n=1000) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + for j in 1:n + juliaset_column!(img, c, n, j) + end + return img +end +``` + +```julia +julia> @btime juliaset(-0.79, 0.15, 1000); + 38.932 ms (2 allocations: 976.67 KiB) +``` + +Let's now try to speed-up the calculation using multi-threadding. `Julia v0.5` has introduced multi-threadding with static-scheduller with a simple syntax: just prepend the for-loop with a `Threads.@threads` macro. With that, the first multi-threaded version will looks like + +```julia +function juliaset_static(x, y, n=1000) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + Threads.@threads :static for j in 1:n + juliaset_column!(img, c, n, j) + end + return img +end +``` + +with benchmark + +```julia +julia> @btime juliaset_static(-0.79, 0.15, 1000); + 15.751 ms (27 allocations: 978.75 KiB) +``` + +Although we have used four-threads, and the communication overhead should be next to zero, the speed improvement is ``2.4``. Why is that? + +To understand bettern what is going on, we have improved the profiler we have been developing last week. The logging profiler logs time of entering and exitting every function call of every thread, which is useful to understand, what is going on. The api is not yet polished, but it will do its job. Importantly, to prevent excessive logging, we ask to log only some functions. + +```julia +using LoggingProfiler +function juliaset_static(x, y, n=1000) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + Threads.@threads :dynamic for j in 1:n + LoggingProfiler.@recordfun juliaset_column!(img, c, n, j) + end + return img +end + +LoggingProfiler.initbuffer!(1000) +juliaset_static(-0.79, 0.15, 1000); +LoggingProfiler.recorded() +LoggingProfiler.adjustbuffer!() +juliaset_static(-0.79, 0.15, 1000) +LoggingProfiler.export2svg("/tmp/profile.svg") +LoggingProfiler.export2luxor("profile.png") +``` + +![profile.png](profile.png) + +From the visualization of the profiler we can see not all threads were working the same time. Thread 1 and 4 were working less that Thread 2 and 3. The reason is that the static scheduller partition the total number of columns (1000) into equal parts, where the total number of parts is equal to the number of threads, and assign each to a single thread. In our case, we will have four parts each of size 250. Since execution time of computing value of each pixel is not the same, threads with a lot zero iterations will finish considerably faster. This is the incarnation of one of the biggest problems in multi-threadding / schedulling. A contemprary approach is to switch to dynamic schedulling, which divides the problem into smaller parts, and when a thread is finished with one part, it assigned new not-yet computed part. + +From 1.5, one can specify the scheduller for `Threads.@thread [scheduller] for` construct to be either `:static` and / or `:dynamic`. The `:dynamic` is compatible with the `partr` dynamic scheduller. From `1.8`, `:dynamic` is default, but the range is dividided into `nthreads()` parts, which is the reason why we do not see an improvement. + +Dynamic scheduller is also supported using by `Threads.@spawn` macro. The prototypical approach used for invocation is the fork-join model, where one recursivelly partitions the problems and wait in each thread for the other + +```julia +function juliaset_recspawn!(img, c, n, lo=1, hi=n, ntasks=128) + if hi - lo > n/ntasks-1 + mid = (lo+hi)>>>1 + finish = Threads.@spawn juliaset_recspawn!(img, c, n, lo, mid, ntasks) + juliaset_recspawn!(img, c, n, mid+1, hi, ntasks) + wait(finish) + return + end + for j in lo:hi + juliaset_column!(img, c, n, j) + end + nothing +end +``` + +Measuring the time we observe four-times speedup, which corresponds to the number of threads. + +```julia +function juliaset_forkjoin(x, y, n=1000, ntasks = 16) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + juliaset_recspawn!(img, c, n, 1, n, ntasks) + return img +end +``` + +```julia +julia> @btime juliaset_forkjoin(-0.79, 0.15); + 10.326 ms (142 allocations: 986.83 KiB) +``` + +This is so far our fastest construction with speedup `38.932 / 10.326 = 3.77×`. + +Unfortunatelly, the `LoggingProfiler` does not handle task migration at the moment, which means that we cannot visualize the results. Due to task switching overhead, increasing the granularity might not pay off. + +```julia +4 tasks: 16.262 ms (21 allocations: 978.05 KiB) +8 tasks: 10.660 ms (45 allocations: 979.80 KiB) +16 tasks: 10.326 ms (142 allocations: 986.83 KiB) +32 tasks: 10.786 ms (238 allocations: 993.83 KiB) +64 tasks: 10.211 ms (624 allocations: 1021.89 KiB) +128 tasks: 10.224 ms (1391 allocations: 1.05 MiB) +256 tasks: 10.617 ms (2927 allocations: 1.16 MiB) +512 tasks: 11.012 ms (5999 allocations: 1.38 MiB) +``` + +```julia +using FLoops, FoldsThreads +function juliaset_folds(x, y, n=1000, basesize = 2) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + @floop ThreadedEx(basesize = basesize) for j in 1:n + juliaset_column!(img, c, n, j) + end + return img +end +``` + +```julia +julia> @btime juliaset_folds(-0.79, 0.15, 1000); + 10.253 ms (3960 allocations: 1.24 MiB) +``` + +where `basesize` is the size of the smallest part allocated to a single thread, in this case 2 columns. + +```julia +julia> @btime juliaset_folds(-0.79, 0.15, 1000); + 10.575 ms (52 allocations: 980.12 KiB) +``` + +```julia +function juliaset_folds(x, y, n=1000, basesize = 2) + c = x + y*im + img = Array{UInt8,2}(undef,n,n) + @floop DepthFirstEx(basesize = basesize) for j in 1:n + juliaset_column!(img, c, n, j) + end + return img +end +``` + +```julia +julia> @btime juliaset_folds(-0.79, 0.15, 1000); + 10.421 ms (3582 allocations: 1.20 MiB) +``` + +We can identify the best smallest size of the work `basesize` and measure its influence on the time + +```julia +map(2 .^ (0:7)) do bs + t = @belapsed juliaset_folds(-0.79, 0.15, 1000, $(bs)); + (;basesize = bs, time = t) +end |> DataFrame +``` + +```julia + Row │ basesize time + │ Int64 Float64 +─────┼───────────────────── + 1 │ 1 0.0106803 + 2 │ 2 0.010267 + 3 │ 4 0.0103081 + 4 │ 8 0.0101652 + 5 │ 16 0.0100204 + 6 │ 32 0.0100097 + 7 │ 64 0.0103293 + 8 │ 128 0.0105411 +``` + +We observe that the minimum is for `basesize = 32`, for which we got `3.8932×` speedup. + +## Garbage collector is single-threadded + +Keep reminded that while threads are very easy very convenient to use, there are use-cases where you might be better off with proccess, even though there will be some communication overhead. One such case happens when you need to allocate and free a lot of memory. This is because Julia's garbage collector is single-threadded (in 1.10 it is now partially multi-threaded). Imagine a task of making histogram of bytes in a directory. +For a fair comparison, we will use `Transducers`, since they offer thread and process based paralelism + +```julia +using Transducers +@everywhere begin + function histfile(filename) + h = Dict{UInt8,Int}() + foreach(open(read, filename, "r")) do b + h[b] = get(h, b, 0) + 1 + end + h + end +end + +files = filter(isfile, readdir("/Users/tomas.pevny/Downloads/", join = true)) +@elapsed foldxd(mergewith(+), files |> Map(histfile)) +150.863183701 +``` + +and using the multi-threaded version of `map` + +```julia +@elapsed foldxt(mergewith(+), files |> Map(histfile)) +205.309952618 +``` + +we see that the threadding is actually worse than process based paralelism despite us paying the price for serialization and deserialization of `Dict`. Needless to say that changing `Dict` to `Vector` as + +```julia +using Transducers +@everywhere begin + function histfile(filename) + h = Dict{UInt8,Int}() + foreach(open(read, filename, "r")) do b + h[b] = get(h, b, 0) + 1 + end + h + end +end +``` + +```julia +julia> files = filter(isfile, readdir("/Users/tomas.pevny/Downloads/", join = true)) + +julia> @elapsed foldxd(mergewith(+), files |> Map(histfile)) +86.44577969 + +julia> @elapsed foldxt(mergewith(+), files |> Map(histfile)) +105.32969331 +``` + +is much better. + +## Locks / lock-free multi-threadding + +Avoid locks. + +## Take away message + +When deciding, what kind of paralelism to employ, consider following + +- for tightly coupled computation over shared data, multi-threadding is more suitable due to non-existing sharing of data between processes +- but if the computation requires frequent allocation and freeing of memery, or IO, separate processes are multi-suitable, since garbage collectors are independent between processes +- Making all cores busy while achieving an ideally linear speedup is difficult and needs a lot of experience and knowledge. Tooling and profilers supporting debugging of parallel processes is not much developped. +- `Transducers` thrives for (almost) the same code to support thread- and process-based paralelism. + +### Materials + +- [http://cecileane.github.io/computingtools/pages/notes1209.html](http://cecileane.github.io/computingtools/pages/notes1209.html) +- [https://lucris.lub.lu.se/ws/portalfiles/portal/61129522/julia_parallel.pdf](https://lucris.lub.lu.se/ws/portalfiles/portal/61129522/julia_parallel.pdf) +- [http://igoro.com/archive/gallery-of-processor-cache-effects/](http://igoro.com/archive/gallery-of-processor-cache-effects/) +- [https://www.csd.uwo.ca/~mmorenom/cs2101a_moreno/Parallel_computing_with_Julia.pdf](https://www.csd.uwo.ca/~mmorenom/cs2101a_moreno/Parallel_computing_with_Julia.pdf) +- Complexity of thread schedulling [https://www.youtube.com/watch?v=YdiZa0Y3F3c](https://www.youtube.com/watch?v=YdiZa0Y3F3c) +- TapIR --- Teaching paralelism to Julia compiler [https://www.youtube.com/watch?v=-JyK5Xpk7jE](https://www.youtube.com/watch?v=-JyK5Xpk7jE) +- Threads: [https://juliahighperformance.com/code/Chapter09.html](https://juliahighperformance.com/code/Chapter09.html) +- Processes: [https://juliahighperformance.com/code/Chapter10.html](https://juliahighperformance.com/code/Chapter10.html) +- Alan Adelman uses FLoops in [https://www.youtube.com/watch?v=dczkYlOM2sg](https://www.youtube.com/watch?v=dczkYlOM2sg) +- Examples: ?Heat equation? from [https://hpc.llnl.gov/training/tutorials/](introduction-parallel-computing-tutorial#Examples(https://hpc.llnl.gov/training/tutorials/) \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_10/pkg_processing.jl b/docs_vitepress/src/lectures/lecture_10/pkg_processing.jl new file mode 100644 index 00000000..0c3c7008 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_10/pkg_processing.jl @@ -0,0 +1,45 @@ +""" + sample_all_installed_pkgs(path::AbstractString) + +Returns root folders of all installed packages in the system. Package version is sampled. +""" +function sample_all_installed_pkgs(path::AbstractString) + pkgs = readdir(path) + # [rand(readdir(joinpath(path, p), join=true)) for p in pkgs] # sampling version + [readdir(joinpath(path, p), join=true)[1] for p in pkgs if isdir(joinpath(path, p))] # deterministic version +end + +""" + filter_jl(path) + +Recursively walks the directory structure to obtain all `.jl` files. +""" +filter_jl(path) = reduce(vcat, joinpath.(rootpath, filter(endswith(".jl"), files)) for (rootpath, dirs, files) in walkdir(path)) + +""" + tokenize(jl_path) + +Parses a ".jl" file located at `jl_path` and extracts all symbols and expression heads from the extracted AST. +""" +function tokenize(jl_path) + _extract_symbols(x) = Symbol[] + _extract_symbols(x::Symbol) = [x] + function _extract_symbols(x::Expr) + if length(x.args) > 0 + Symbol.(vcat(x.head, reduce(vcat, _extract_symbols(arg) for arg in x.args))) + else + Symbol[] + end + end + + scode = "begin\n" * read(jl_path, String) * "end\n" + try + code = Meta.parse(scode) + _extract_symbols(code) + catch e + if ~isa(e, Meta.ParseError) + rethrow(e) + end + Symbol[] + end +end diff --git a/docs_vitepress/src/lectures/lecture_10/profile.png b/docs_vitepress/src/lectures/lecture_10/profile.png new file mode 100644 index 00000000..76306921 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_10/profile.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/Intel_Core2.png b/docs_vitepress/src/lectures/lecture_11/Intel_Core2.png new file mode 100644 index 00000000..bedb82ea Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/Intel_Core2.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/Manifest.toml b/docs_vitepress/src/lectures/lecture_11/Manifest.toml new file mode 100644 index 00000000..295200dc --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/Manifest.toml @@ -0,0 +1,408 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.9.4" +manifest_format = "2.0" +project_hash = "a2640927cba4ebbc7204bebc0951783914229991" + +[[deps.Adapt]] +deps = ["LinearAlgebra", "Requires"] +git-tree-sha1 = "cde29ddf7e5726c9fb511f340244ea3481267608" +uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +version = "3.7.2" +weakdeps = ["StaticArrays"] + + [deps.Adapt.extensions] + AdaptStaticArraysExt = "StaticArrays" + +[[deps.ArgTools]] +uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" +version = "1.1.1" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" + +[[deps.Atomix]] +deps = ["UnsafeAtomics"] +git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" +uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" +version = "0.1.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[deps.Bzip2_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "19a35467a82e236ff51bc17a3a44b69ef35185a2" +uuid = "6e34b625-4abd-537c-b88f-471c36dfa7a0" +version = "1.0.8+0" + +[[deps.CEnum]] +git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" +uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" +version = "0.4.2" + +[[deps.CompilerSupportLibraries_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" +version = "1.0.5+0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[deps.Downloads]] +deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] +uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" +version = "1.6.0" + +[[deps.Expat_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "4558ab818dcceaab612d1bb8c19cee87eda2b83c" +uuid = "2e619515-83b5-522b-bb60-26c02a35a201" +version = "2.5.0+0" + +[[deps.ExprTools]] +git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" +uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" +version = "0.1.10" + +[[deps.FileWatching]] +uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" + +[[deps.GPUArrays]] +deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] +git-tree-sha1 = "85d7fb51afb3def5dcb85ad31c3707795c8bccc1" +uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" +version = "9.1.0" + +[[deps.GPUArraysCore]] +deps = ["Adapt"] +git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0" +uuid = "46192b85-c4d5-4398-a991-12ede77f4527" +version = "0.1.5" + +[[deps.GPUCompiler]] +deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "TimerOutputs", "UUIDs"] +git-tree-sha1 = "5e4487558477f191c043166f8301dd0b4be4e2b2" +uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" +version = "0.24.5" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.5.0" + +[[deps.KernelAbstractions]] +deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] +git-tree-sha1 = "653e0824fc9ab55b3beec67a6dbbe514a65fb954" +uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +version = "0.9.15" + + [deps.KernelAbstractions.extensions] + EnzymeExt = "EnzymeCore" + + [deps.KernelAbstractions.weakdeps] + EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" + +[[deps.LLVM]] +deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] +git-tree-sha1 = "0678579657515e88b6632a3a482d39adcbb80445" +uuid = "929cbde3-209d-540e-8aea-75f648917ca0" +version = "6.4.1" + + [deps.LLVM.extensions] + BFloat16sExt = "BFloat16s" + + [deps.LLVM.weakdeps] + BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" + +[[deps.LLVMExtra_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] +git-tree-sha1 = "98eaee04d96d973e79c25d49167668c5c8fb50e2" +uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" +version = "0.0.27+1" + +[[deps.LazyArtifacts]] +deps = ["Artifacts", "Pkg"] +uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" + +[[deps.LibCURL]] +deps = ["LibCURL_jll", "MozillaCACerts_jll"] +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.6.4" + +[[deps.LibCURL_jll]] +deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] +uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" +version = "8.4.0+0" + +[[deps.LibGit2]] +deps = ["Base64", "NetworkOptions", "Printf", "SHA"] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[deps.LibMPDec_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "6eaa22a233f28bc5d6092f3f8e685f85080fba11" +uuid = "7106de7a-f406-5ef1-84f7-3345f7341bd2" +version = "2.5.1+0" + +[[deps.LibSSH2_jll]] +deps = ["Artifacts", "Libdl", "MbedTLS_jll"] +uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" +version = "1.11.0+1" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[deps.Libffi_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] +git-tree-sha1 = "0b4a5d71f3e5200a7dff793393e09dfc2d874290" +uuid = "e9f186c6-92d2-5b65-8a66-fee21dc1b490" +version = "3.2.2+1" + +[[deps.LinearAlgebra]] +deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[deps.MacroTools]] +deps = ["Markdown", "Random"] +git-tree-sha1 = "b211c553c199c111d998ecdaf7623d1b89b69f93" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.5.12" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.2+0" + +[[deps.Metal]] +deps = ["Adapt", "Artifacts", "CEnum", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LinearAlgebra", "Metal_LLVM_Tools_jll", "ObjectFile", "ObjectiveC", "Printf", "Python_jll", "Random", "Reexport", "Requires", "StaticArrays"] +git-tree-sha1 = "b696f1ad8bab7c53e022c0c3c226ed0f7ec2015e" +uuid = "dde4c033-4e86-420c-a63e-0dd931031962" +version = "0.5.1" + + [deps.Metal.extensions] + SpecialFunctionsExt = "SpecialFunctions" + + [deps.Metal.weakdeps] + SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" + +[[deps.Metal_LLVM_Tools_jll]] +deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML", "Zlib_jll"] +git-tree-sha1 = "7fb1688d2e08c6e08840b41d9d46510f105b20e6" +uuid = "0418c028-ff8c-56b8-a53e-0f9676ed36fc" +version = "0.5.1+0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2022.10.11" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.2.0" + +[[deps.ObjectFile]] +deps = ["Reexport", "StructIO"] +git-tree-sha1 = "195e0a19842f678dd3473ceafbe9d82dfacc583c" +uuid = "d8793406-e978-5875-9003-1fc021f44a92" +version = "0.4.1" + +[[deps.ObjectiveC]] +deps = ["CEnum", "Preferences"] +git-tree-sha1 = "9abcf85a7e05283fdac7fa0b2d46511f35c875ee" +uuid = "e86c9b32-1129-44ac-8ea0-90d5bb39ded9" +version = "1.1.0" + +[[deps.OpenBLAS_jll]] +deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] +uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" +version = "0.3.21+4" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "a12e56c72edee3ce6b96667745e6cbbe5498f200" +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "1.1.23+0" + +[[deps.Pkg]] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +version = "1.9.2" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "03b4c25b43cb84cee5c90aa9b5ea0a78fd848d2f" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.2.0" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "00805cd429dcb4870060ff49ef443486c262e38e" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.4.1" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[deps.Python_jll]] +deps = ["Artifacts", "Bzip2_jll", "Expat_jll", "JLLWrappers", "LibMPDec_jll", "Libdl", "Libffi_jll", "OpenSSL_jll", "Pkg", "SQLite_jll", "XZ_jll", "Zlib_jll"] +git-tree-sha1 = "07aa31a2eeea4e93d1ce92696dc64fb76a7f632c" +uuid = "93d3a430-8e7c-50da-8e8d-3dfcfb3baf05" +version = "3.10.8+1" + +[[deps.REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[deps.Random]] +deps = ["SHA", "Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[deps.Reexport]] +git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" +uuid = "189a3867-3050-52da-a836-e630ba90ab69" +version = "1.2.2" + +[[deps.Requires]] +deps = ["UUIDs"] +git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "1.3.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.SQLite_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl", "Zlib_jll"] +git-tree-sha1 = "81f7d934b52b2441f7b44520bd982fdb3607b0da" +uuid = "76ed43ae-9a5d-5a62-8c75-30186b810ce8" +version = "3.43.0+0" + +[[deps.Scratch]] +deps = ["Dates"] +git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" +uuid = "6c6a2e73-6563-6170-7368-637461726353" +version = "1.2.1" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[deps.SparseArrays]] +deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[deps.StaticArrays]] +deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] +git-tree-sha1 = "fba11dbe2562eecdfcac49a05246af09ee64d055" +uuid = "90137ffa-7385-5640-81b9-e52037218182" +version = "1.8.1" + + [deps.StaticArrays.extensions] + StaticArraysChainRulesCoreExt = "ChainRulesCore" + StaticArraysStatisticsExt = "Statistics" + + [deps.StaticArrays.weakdeps] + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[deps.StaticArraysCore]] +git-tree-sha1 = "36b3d696ce6366023a0ea192b4cd442268995a0d" +uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" +version = "1.4.2" + +[[deps.Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +version = "1.9.0" + +[[deps.StructIO]] +deps = ["Test"] +git-tree-sha1 = "010dc73c7146869c042b49adcdb6bf528c12e859" +uuid = "53d494c1-5632-5724-8f4c-31dff12d585f" +version = "0.3.0" + +[[deps.SuiteSparse_jll]] +deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] +uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" +version = "5.10.1+6" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Tar]] +deps = ["ArgTools", "SHA"] +uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +version = "1.10.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[deps.TimerOutputs]] +deps = ["ExprTools", "Printf"] +git-tree-sha1 = "f548a9e9c490030e545f72074a41edfd0e5bcdd7" +uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +version = "0.5.23" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[deps.UnsafeAtomics]] +git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" +uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" +version = "0.2.1" + +[[deps.UnsafeAtomicsLLVM]] +deps = ["LLVM", "UnsafeAtomics"] +git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e" +uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" +version = "0.1.3" + +[[deps.XZ_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "522b8414d40c4cbbab8dee346ac3a09f9768f25d" +uuid = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" +version = "5.4.5+0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.2.13+0" + +[[deps.libblastrampoline_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" +version = "5.8.0+0" + +[[deps.nghttp2_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" +version = "1.52.0+1" + +[[deps.p7zip_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" +version = "17.4.0+0" diff --git a/docs_vitepress/src/lectures/lecture_11/Project.toml b/docs_vitepress/src/lectures/lecture_11/Project.toml new file mode 100644 index 00000000..bda0aba3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/Project.toml @@ -0,0 +1,3 @@ +[deps] +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" +Metal = "dde4c033-4e86-420c-a63e-0dd931031962" diff --git a/docs_vitepress/src/lectures/lecture_11/cudanative.png b/docs_vitepress/src/lectures/lecture_11/cudanative.png new file mode 100644 index 00000000..0be8db39 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/cudanative.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/ffnn.jl b/docs_vitepress/src/lectures/lecture_11/ffnn.jl new file mode 100644 index 00000000..2ab40bcb --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/ffnn.jl @@ -0,0 +1,204 @@ +using GLMakie +function flower(n; npetals = 8) + n = div(n, npetals) + x = mapreduce(hcat, (1:npetals) .* (2π/npetals)) do θ + ct = cos(θ) + st = sin(θ) + + x0 = tanh.(randn(1, n) .- 1) .+ 4.0 .+ 0.05.* randn(1, n) + y0 = randn(1, n) .* 0.3 + + x₁ = x0 * cos(θ) .- y0 * sin(θ) + x₂ = x0 * sin(θ) .+ y0 * cos(θ) + vcat(x₁, x₂) + end + _y = mapreduce(i -> fill(i, n), vcat, 1:npetals) + y = zeros(npetals, length(_y)) + foreach(i -> y[_y[i], i] = 1, 1:length(_y)) + Float32.(x), Float32.(y) +end + +x, y = flower(900) +scatter(x[1,:], x[2,:], color = mapslices(argmax, y, dims = 1)[:]) + +####### +# Define a Tracked Array for operator overloading AD +####### +struct TrackedArray{T,N,V<:AbstractArray{T,N}} <: AbstractArray{T,N} + value::V + grad::Union{Nothing,V} + tape::Vector{Any} +end + +TrackedArray(a::AbstractArray) = TrackedArray(a, similar(a) .= 0, []) +TrackedMatrix{T,V} = TrackedArray{T,2,V} where {T,V<:AbstractMatrix{T}} +TrackedVector{T,V} = TrackedArray{T,1,V} where {T,V<:AbstractVector{T}} +Base.size(a::TrackedArray) = size(a.value) +Base.show(io::IO, ::MIME"text/plain", a::TrackedArray) = show(io, a) +Base.show(io::IO, a::TrackedArray) = print(io, "TrackedArray($(size(a.value)))") +value(A::TrackedArray) = A.value +resetgrad!(A::TrackedArray) = (A.grad .= 0; empty!(A.tape)) +value(A) = A +track(A) = TrackedArray(A) +track(a::Number) = TrackedArray(reshape([a], 1, 1)) + +function accum!(A::TrackedArray) + isempty(A.tape) && return(A.grad) + A.grad .= sum(g(accum!(r)) for (r, g) in A.tape) + empty!(A.tape) + A.grad +end + +####### +# Define AD rules for few operations appearing in FFNN +####### +import Base: +, * +import Base.Broadcast: broadcasted +function *(A::TrackedMatrix, B::TrackedMatrix) + a, b = value.((A, B)) + C = track(a * b) + push!(A.tape, (C, Δ -> Δ * b')) + push!(B.tape, (C, Δ -> a' * Δ)) + C +end + +function *(A::TrackedMatrix, B::AbstractMatrix) + a, b = value.((A, B)) + C = track(a * b) + push!(A.tape, (C, Δ -> Δ * b')) + C +end + +function broadcasted(::typeof(+), A::TrackedMatrix, B::TrackedVector) + C = track(value(A) .+ value(B)) + push!(A.tape, (C, Δ -> Δ)) + push!(B.tape, (C, Δ -> sum(Δ, dims = 2)[:])) + C +end + +function σ(x::Real) + t = @fastmath exp(-abs(x)) + y = ifelse(x ≥ 0, inv(1 + t), t / (1 + t)) + ifelse(x > 40, one(y), ifelse(x < -80, zero(y), y)) +end + +broadcasted(::typeof(identity), A::TrackedArray) = A + +function broadcasted(::typeof(σ), A::TrackedArray) + Ω = σ.(value(A)) + C = track(Ω) + push!(A.tape, (C, Δ -> Δ .* Ω .* (1 .- Ω))) + C +end + +function mse(A::TrackedMatrix, B::AbstractMatrix) + n = size(A, 2) + a = value(A) + c = similar(a, 1, 1) + c .= sum((a .- B).^2)/2n + C = track(c) + push!(A.tape, (C, Δ -> Δ .* (a .- B) ./ n)) + C +end + +mse(x::AbstractMatrix, y::AbstractMatrix) = sum((x - y).^2) / (2*size(x,2)) + +####### +# Define a Dense layer +####### +struct Dense{F,W,B} + σ::F + w::W + b::B +end + +Base.show(io::IO, m::Dense) = print(io, "Dense($(size(m.w,2)) → $(size(m.w,1)))") +Dense(i::Int, o::Int, σ = identity) = Dense(σ, randn(Float32, o, i), randn(Float32, o)) +track(m::Dense) = Dense(m.σ, track(m.w), track(m.b)) +track(m::ComposedFunction) = track(m.outer) ∘ track(m.inner) +(m::Dense)(x) = m.σ.(m.w * x .+ m.b) +params(m::ComposedFunction) = vcat(params(m.outer), params(m.inner)) +params(m::Dense) = [m.w, m.b] + +####### +# Let's try to actually train a model +####### +x, y = flower(900) +function initmodel() + m₁ = track(Dense(2, 20, σ)) + m₂ = track(Dense(20, 20, σ)) + m₃ = track(Dense(20, size(y,1))) + m = m₃ ∘ m₂ ∘ m₁ +end +m = initmodel() +m(x) |> value + +###### +# Let's try to learn the parameters +###### +α = 0.01 +ps = params(m) +@elapsed for i in 1:10000 + foreach(resetgrad!, ps) + loss = mse(m(x), y) + fill!(loss.grad, 1) + foreach(accum!, ps) + foreach(x -> x.value .-= α .* x.grad, ps) + mod(i,250) == 0 && println("loss after $(i) iterations = ", sum(value(loss))) +end + +all(mapslices(argmax, value(m(x)), dims = 1)[:] .== mapslices(argmax, y, dims = 1)[:]) +scatter(x[1,:], x[2,:], color = mapslices(argmax, value(m(x)), dims = 1)[:]) + +###### +# Let's try to move the computation to GPU +###### +using CUDA +gpu(x::AbstractArray) = CuArray(x) +gpu(x::TrackedArray) = TrackedArray(CuArray(value(x))) +gpu(m::Dense) = Dense(m.σ, gpu(m.w), gpu(m.b)) +gpu(m::ComposedFunction) = gpu(m.outer) ∘ gpu(m.inner) + +gx, gy = gpu(x), gpu(y) +m = gpu(m) +ps = params(m) +@elapsed for i in 1:10000 + foreach(resetgrad!, ps) + loss = mse(m(gx), gy) + fill!(loss.grad, 1) + foreach(accum!, ps) + foreach(x -> x.value .-= α .* x.grad, ps) + mod(i,250) == 0 && println("loss after $(i) iterations = ", sum(value(loss))) +end + +####### +# Why we see a small speed-up? The problem is small +####### +using BenchmarkTools +p = randn(Float32, 20, 2) +@benchmark $(p) * $(x) +gp = gpu(p) +@benchmark $(gp) * $(gx) + + +###### +# Let's verify the gradients +###### +using FiniteDifferences +ps = [m₃.w, m₃.b, m₂.w, m₂.b, m₁.w, m₁.b] +map(ps) do p + foreach(resetgrad!, ps) + loss = mse(m(x), y) + fill!(loss.grad, 1) + foreach(accum!, ps) + accum!(p) + θ = deepcopy(value(p)) + Δθ = deepcopy(p.grad) + f = θ -> begin + p.value .= θ + value(mse(m(x), y)) + end + sum(abs2.(grad(central_fdm(5, 1), f, θ)[1] - Δθ)) +end + + diff --git a/docs_vitepress/src/lectures/lecture_11/grid_block_thread.png b/docs_vitepress/src/lectures/lecture_11/grid_block_thread.png new file mode 100644 index 00000000..63567d49 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/grid_block_thread.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/lab.md b/docs_vitepress/src/lectures/lecture_11/lab.md new file mode 100644 index 00000000..8d0427f8 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/lab.md @@ -0,0 +1,830 @@ +# [Lab 11: GPU programming](@id gpu_lab) + +![](../../assets/julia-gpu-logo.png) + +In this lab we are going to delve into GPU acceleration. Julia offers a unified interface to the +four most important GPU vendors via four separate packages: + +- [CUDA.jl](https://cuda.juliagpu.org/stable/) +- [AMDGPU.jl](https://amdgpu.juliagpu.org/stable/) +- [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) (Intel's oneAPI toolkit) +- [Metal.jl](https://metal.juliagpu.org/stable/) (targets Apple's M-series chips) + + +::: tip + +[ Tim Besard - GPU Programming in Julia: What, Why and How? ](https://www.youtube.com/watch?v=Q8fj8QbVpZM) + +::: + +## Array programming + +We can do quite a lot without even knowing that we are using GPU instead of CPU. This marvel is the +combination of Julia's multiple dispatch and array abstractions. In many cases it will be enough to +move your CPU array to the appropriate GPU array: + +```julia +using MyGPUPackage + +a = rand(Float32, 1024) |> MyGPUArray + +sin.(a) |> sum +``` + +Based on the size of the problem and intricacy of the computation we may be achieve both incredible +speedups as well as slowdowns. + +All four above mentioned pacakages have (almost) the same interfaces, which offer the following +user facing functionalities + +- device management `versioninfo`, `device!` +- definition of arrays on gpu (e.g. `CuArray` or `MtlArray`) +- data copying from host(CPU) to device(GPU) and the other way around +- wrapping already existing library code in `CuBLAS`, `CuRAND`, `CuDNN`, `CuSparse` and others +- kernel based programming (more on this in the second half of the lab) + +Let's use this to inspect our GPU hardware. + +```julia +julia> using Metal + +julia> Metal.versioninfo() +macOS 14.2.1, Darwin 23.2.0 + +Toolchain: +- Julia: 1.9.4 +- LLVM: 14.0.6 + +Julia packages: +- Metal.jl: 0.5.1 +- Metal_LLVM_Tools_jll: 0.5.1+0 + +1 device: +- Apple M1 Pro (2.608 GiB allocated) +``` + +As we have already seen in the [lecture](@ref gpu_lecture_no_kernel), we can simply import e.g. +`Metal.jl` define some arrays, move them to the GPU and do some computation. In the following code +we define two matrices `1000x1000` filled with random numbers and multiply them using usual `x * y` +syntax. + +```julia +using Metal + +x = randn(Float32, 60, 60) +y = randn(Float32, 60, 60) + +mx = MtlArray(x) +my = MtlArray(y) + +@info "" x*y ≈ Matrix(mx*my) + +┌ Info: +└ x * y ≈ Matrix(mx * my) = true +``` + +This may not be anything remarkable, as such functionality is available in many other languages +albeit usually with a less mathematical notation like `x.dot(y)`. With Julia's multiple dispatch, we +can simply dispatch the multiplication operator/function `*` to a specific method that works on +`MtlArray` type. You can check with `@code_typed`: + +```julia +julia> @code_typed mx * my +CodeInfo( +1 ─ %1 = Base.getfield(A, :dims)::Tuple{Int64, Int64} +│ %2 = Base.getfield(%1, 1, true)::Int64 +│ %3 = Base.getfield(B, :dims)::Tuple{Int64, Int64} +│ %4 = Base.getfield(%3, 2, true)::Int64 +│ %5 = Core.tuple(%2, %4)::Tuple{Int64, Int64} +│ %6 = LinearAlgebra.similar::typeof(similar) +│ %7 = invoke Metal.:(var"#similar#30")($(QuoteNode(Metal.MTL.MTLResourceStorageModePrivate))::Metal.MTL.MTLResourceOptions, %6::typeof(similar), B::Metal.MtlMatrix{Float32, Metal.MTL.MTLResourceStorageModePrivate}, Float32::Type{Float32}, %5::Tuple{Int64, Int64})::Metal.MtlMatrix{Float32} +│ %8 = LinearAlgebra.gemm_wrapper!(%7, 'N', 'N', A, B, $(QuoteNode(LinearAlgebra.MulAddMul{true, true, Bool, Bool}(true, false))))::Metal.MtlMatrix{Float32} +└── return %8 +) => Metal.MtlMatrix{Float32} +``` + +Let's now explore what the we can do with this array programming paradigm on some practical examples. + + +::: warning Exercise + +Load a sufficiently large image to the GPU such as the one provided in the lab (anything >1Mpx +should be enough) and manipulate it in the following ways: +- create a negative +- halve the pixel brightness +- find the brightest pixels + +Measure the runtime difference with `BenchmarkTools`. Load the image with the following code, which +adds all the necessary dependencies and loads the image into Floa32 matrix. + +```julia +# using Pkg; +# Pkg.add(["FileIO", "ImageMagick", "ImageShow", "ColorTypes"]) + +# using FileIO, ImageMagick, ImageShow, ColorTypes +# +# rgb_img = FileIO.load("image.jpeg"); +# gray_img = Float32.(Gray.(rgb_img)); +gray_img = rand(Float32, 10000, 10000) +cgray_img = MtlArray(gray_img) +``` + +**HINTS**: +- use `Float32` everywhere for better performance +- use `Metal.@sync` during benchmarking in order to ensure that the computation has completed + +::: + +::: danger Scalar indexing + +Some operations such as showing an image calls fallback implementation which requires +`getindex!` called from the CPU. As such it is incredibly slow and should be avoided. In order +to show the image use `Array(cimg)` to move it as a whole. Another option is to suppress the +output with semicolon + +```julia +julia> cimg +┌ Warning: Performing scalar indexing on task Task (runnable) @0x00007f25931b6380. +│ Invocation of getindex resulted in scalar indexing of a GPU array. +│ This is typically caused by calling an iterating implementation of a method. +│ Such implementations *do not* execute on the GPU, but very slowly on the CPU, +│ and therefore are only permitted from the REPL for prototyping purposes. +│ If you did intend to index this array, annotate the caller with @allowscalar. +└ @ GPUArrays ~/.julia/packages/GPUArrays/gkF6S/src/host/indexing.jl:56 +julia> Array(cimg) +Voila! +julia> cimg; +``` + +::: + +::: details Show solution + + +```julia +negative(i) = 1.0f0 .- i +darken(i) = i .* 0.5f0 +brightest(i) = findmax(i) +``` + +Benchmarking +```julia +julia> using BenchmarkTools + +julia> @btime Metal.@sync negative($cgray_img); + 53.253 ms (295 allocations: 7.68 KiB) + +julia> @btime negative($gray_img); + 37.857 ms (2 allocations: 381.47 MiB) + +julia> @btime Metal.@sync darken($cgray_img); + 52.056 ms (311 allocations: 7.99 KiB) + +julia> @btime darken($gray_img); + 39.182 ms (2 allocations: 381.47 MiB) + +julia> @btime Metal.@sync brightest($cgray_img); + 43.543 ms (1359 allocations: 34.91 KiB) + +julia> @btime brightest($gray_img); + 124.636 ms (0 allocations: 0 bytes) +``` + +::: + + +In the next example we will try to solve a system of linear equations $Ax=b$, where A is a large +(possibly sparse) matrix. + +::: warning Exercise + +Benchmark the solving of the following linear system with `N` equations and `N` unknowns. Experiment +with increasing `N` to find a value , from which the advantage of sending the matrix to GPU is +significant (include the time of sending the data to and from the device). For the sake of this +example significant means 2x speedup. At what point the memory requirements are incompatible with +your hardware, i.e. exceeding the memory of a GPU? + +```julia +α = 10.0f0 +β = 10.0f0 + +function init(N, α, β, r = (0.f0, π/2.0f0)) + dx = (r[2] - r[1]) / N + A = zeros(Float32, N+2, N+2) + A[1,1] = 1.0f0 + A[end,end] = 1.0f0 + for i in 2:N+1 + A[i,i-1] = 1.0f0/(dx*dx) + A[i,i] = -2.0f0/(dx*dx) - 16.0f0 + A[i,i+1] = 1.0f0/(dx*dx) + end + + b = fill(-8.0f0, N+2) + b[1] = α + b[end] = β + A, b +end + +N = 30 +A, b = init(N, α, β) +``` + +**HINTS**: +- use backslash operator `\` to solve the system +- use `CuArray` and `Array` for moving the date to and from device respectively +- use `CUDA.@sync` during benchmarking in order to ensure that the computation has completed + +**BONUS 1**: Visualize the solution `x`. What may be the origin of our linear system of equations? +**BONUS 2**: Use sparse matrix `A` to achieve the same thing. Can we exploit the structure of the matrix for a more effective solution? + +::: + +::: details Show solution + +```julia +A, b = init(N, α, β) +cA, cb = CuArray(A), CuArray(b) +A +b +A\b +cA\cb + +@btime $A \ $b; +@btime CUDA.@sync Array(CuArray($A) \ CuArray($b)); +``` + +**BONUS 1**: The system comes from a solution of second order ODR with *boundary conditions*. + +**BONUS 2**: The matrix is tridiagonal, therefore we don't have to store all the entries. + +::: + +Programming GPUs in this way is akin to using NumPy, MATLAB and other array based toolkits, which +force users not to use for loops. There are attempts to make GPU programming in Julia more powerful +without delving deeper into writing of GPU kernels. One of the attempts is +[`Tulio.jl`](https://github.com/mcabbott/Tullio.jl), which uses macros to annotate parallel for +loops, similar to [`OpenMP`](https://www.openmp.org/)'s `pragma` intrinsics, which can be compiled +to GPU as well. + +Note also that Julia's `CUDA.jl` is not a tensor compiler. With the exception of broadcast fusion, +which is easily transferable to GPUs, there is no optimization between different kernels from the +compiler point of view. Furthermore, memory allocations on GPU are handled by Julia's GC, which is +single threaded and often not as aggressive, therefore similar application code can have different +memory footprints on the GPU. + +Nowadays there is a big push towards simplifying programming of GPUs, mainly in the machine learning +community, which often requires switching between running on GPU/CPU to be a one click deal. However +this may not always yield the required results, because the GPU's computation model is different +from the CPU, see [lecture](@ref gpu_lecture). This being said e.g. Julia's `Flux.jl` framework does +offer such capabilities [^2] + +```julia +using Flux, CUDA +m = Dense(10,5) |> gpu +x = rand(10) |> gpu +y = m(x) +y |> cpu +``` + +[^2]: Taken from `Flux.jl` [documentation](https://fluxml.ai/Flux.jl/stable/guide/gpu/) + +## Kernel programming + +There are two paths that lead to the necessity of programming GPUs more directly via kernels + +1. We cannot express our algorithm in terms of array operations. +2. We want to get more out of the code, + +Note that the ability to write kernels in the language of your choice is not granted, as this club +includes a limited amount of members - C, C++, Fortran, Julia [^3]. Consider then the following +comparison between `CUDA C` and `CUDA.jl` implementation of a simple vector addition kernels as seen +in the [lecture](@ref gpu_lecture_yes_kernel). + +[^3]: There may be more of them, however these are the main ones. + +```c +#define cudaCall(err) // check return code for error +#define frand() (float)rand() / (float)(RAND_MAX) + +__global__ void vadd(const float *a, const float *b, float *c) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + c[i] = a[i] + b[i]; +} + +const int len = 100; +int main() { + float *a, *b; + a = new float[len]; + b = new float[len]; + for (int i = 0; i < len; i++) { + a[i] = frand(); b[i] = frand(); + } + float *d_a, *d_b, *d_c; + cudaCall(cudaMalloc(&d_a, len * sizeof(float))); + cudaCall(cudaMemcpy(d_a, a, len * sizeof(float), cudaMemcpyHostToDevice)); + cudaCall(cudaMalloc(&d_b, len * sizeof(float))); + + cudaCall(cudaMemcpy(d_b, b, len * sizeof(float), cudaMemcpyHostToDevice)); + cudaCall(cudaMalloc(&d_c, len * sizeof(float))); + + vadd<<<1, len>>>(d_a, d_b, d_c); + + float *c = new float[len]; + cudaCall(cudaMemcpy(c, d_c, len * sizeof(float), cudaMemcpyDeviceToHost)); + cudaCall(cudaFree(d_c)); + cudaCall(cudaFree(d_b)); + cudaCall(cudaFree(d_a)); + + return 0; +} +``` + +Compared to CUDA C the code is less bloated, while having the same functionality.[^4] + +```julia +function vadd(a, b, c) + # CUDA.jl + # i = (blockIdx().x-1) * blockDim().x + threadIdx().x + + # Metal.jl + i = thread_position_in_grid_1d() + c[i] = a[i] + b[i] + + return +end + +len = 100 +a = rand(Float32, len) +b = rand(Float32, len) +d_a = MtlArray(a) +d_b = MtlArray(b) +d_c = similar(d_a) +@metal threads = len vadd(d_a, d_b, d_c) +c = Array(d_c) +``` + +You can check what instructions are implemented by your custom kernel via GPU specific introspection +macros: + +```julia +julia> @device_code_agx @metal threads = len vadd(d_a, d_b, d_c) +; GPUCompiler.CompilerJob{GPUCompiler.MetalCompilerTarget, Metal.MetalCompilerParams}(MethodInstance for vadd(::MtlDeviceVector{Float32, 1}, ::MtlDeviceVector{Float32, 1}, ::MtlDeviceVector{Float32, 1}), CompilerConfig for GPUCompiler.MetalCompilerTarget, 0x00000000000082f7) + +___Z4vadd14MtlDeviceArrayI7Float32Li1ELi1EES_IS0_Li1ELi1EES_IS0_Li1ELi1EE._agc.main: + 0: 0511100d00c43200 device_load 0, i32, xy, r2_r3, u0_u1, 1, signed, lsl 1 + 8: 3800 wait 0 + a: f2151004 get_sr r5.cache, sr80 (thread_position_in_grid.x) + e: 9204840200010150 icmpsel ugt, r1l.cache, r2.cache, 0, 1, 0 + 16: 92028602000101d0 icmpsel sgt, r0h.cache, r3.cache, 0, 1, 0 + 1e: 9202860200c2108c icmpsel seq, r0h.cache, r3.cache, 0, r1l.discard, r0h.discard + 26: 8e0501a028000000 iadd r1.cache, 1, r5.cache + 2e: 921981000000418c icmpsel seq, r6.cache, r0h.cache, 0, 0, r2.discard + 36: 9209c1000000618c icmpsel seq, r2.cache, r0h.discard, 0, 0, r3.discard + 3e: 920ccc2228010130 icmpsel ult, r3l.cache, r6.discard, r1.cache, 1, 0 + 46: 9202840200010130 icmpsel ult, r0h.cache, r2.cache, 0, 1, 0 + 4e: 9202c40200c6108c icmpsel seq, r0h.cache, r2.discard, 0, r3l.discard, r0h.discard + 56: 920842a22c010130 icmpsel ult, r2l.cache, r1, r5.discard, 1, 0 + 5e: 9202c10000c41090 icmpsel seq, r0h.cache, r0h.discard, 0, r2l.discard, 1 + 66: e2000000 mov_imm r0l.cache, 0 + 6a: 5288c1000000 if_icmp r0l, seq, r0h.discard, 0, 1 + 70: 20c0b6000000 jmp_exec_none 0x126 + 76: 0511140d00c43200 device_load 0, i32, xy, r2_r3, u2_u3, 1, signed, lsl 1 + 7e: 3800 wait 0 + 80: 9210840200010150 icmpsel ugt, r4l.cache, r2.cache, 0, 1, 0 + 88: 92028602000101d0 icmpsel sgt, r0h.cache, r3.cache, 0, 1, 0 + 90: 9202860200c8108c icmpsel seq, r0h.cache, r3.cache, 0, r4l.discard, r0h.discard + 98: 921581000000418c icmpsel seq, r5.cache, r0h.cache, 0, 0, r2.discard + a0: 9209c1000000618c icmpsel seq, r2.cache, r0h.discard, 0, 0, r3.discard + a8: 920cca2224010130 icmpsel ult, r3l.cache, r5.discard, r1, 1, 0 + b0: 9202840200010130 icmpsel ult, r0h.cache, r2.cache, 0, 1, 0 + b8: 9202c40200c6108c icmpsel seq, r0h.cache, r2.discard, 0, r3l.discard, r0h.discard + c0: 9202c10000001190 icmpsel seq, r0h.cache, r0h.discard, 0, 0, 1 + c8: 5288c1000000 if_icmp r0l, seq, r0h.discard, 0, 1 + ce: 20c058000000 jmp_exec_none 0x126 + d4: 0511180d00c43200 device_load 0, i32, xy, r2_r3, u4_u5, 1, signed, lsl 1 + dc: 3800 wait 0 + de: 9210840200010150 icmpsel ugt, r4l.cache, r2.cache, 0, 1, 0 + e6: 92028602000101d0 icmpsel sgt, r0h.cache, r3.cache, 0, 1, 0 + ee: 9202860200c8108c icmpsel seq, r0h.cache, r3.cache, 0, r4l.discard, r0h.discard + f6: 921581000000418c icmpsel seq, r5.cache, r0h.cache, 0, 0, r2.discard + fe: 9209c1000000618c icmpsel seq, r2.cache, r0h.discard, 0, 0, r3.discard + 106: 9204ca222c010130 icmpsel ult, r1l.cache, r5.discard, r1.discard, 1, 0 + 10e: 9202840200010130 icmpsel ult, r0h.cache, r2.cache, 0, 1, 0 + 116: 9202c40200c2108c icmpsel seq, r0h.cache, r2.discard, 0, r1l.discard, r0h.discard + 11e: 1202c10000001190 icmpsel seq, r0h, r0h.discard, 0, 0, 1 + 126: 521600000000 pop_exec r0l, 2 + 12c: 721d1004 get_sr r7, sr80 (thread_position_in_grid.x) + 130: 0529000d00c43200 device_load 0, i32, xy, r5_r6, u0_u1, 0, signed, lsl 1 + 138: 0509040d00c43200 device_load 0, i32, xy, r1_r2, u2_u3, 0, signed, lsl 1 + 140: 3800 wait 0 + 142: 0519ea0400c01200 device_load 0, i32, x, r3, r5_r6, r7, signed + 14a: 0529e20400c01200 device_load 0, i32, x, r5, r1_r2, r7, signed + 152: 0509080d00c43200 device_load 0, i32, xy, r1_r2, u4_u5, 0, signed, lsl 1 + 15a: 3800 wait 0 + 15c: 2a8dc6a22c00 fadd32 r3, r3.discard, r5.discard + 162: 4519e20400c01200 device_store 0, i32, x, r3, r1_r2, r7, signed, 0 + 16a: 8800 stop +``` + +[^4]: This comparison is not fair to `CUDA C`, where memory management is left to the user and all the types have to be specified. However at the end of the day the choice of a high level language makes more sense as it offers the same functionality and is far more approachable. + +### CUDA programming model + +Recalling from the lecture, in CUDA's programming model, you usually write kernels, which represent +the body of some parallel for loop. + +- A kernel is executed on multiple threads, which are grouped into thread blocks. +- All threads in a block are executed in the same Streaming Multi-processor (SM), having access to + some shared pool of memory. +- The number of threads launched is always a multiple of 32 (32 threads = 1 warp, therefore length + of a thread block should be divisible by 32). +- All threads in a single warp are executed simultaneously. +- We have to take care of how many threads will be launched in order to complete the task at hand, + i.e. if there are insufficiently many threads/blocks spawned we may end up doing only part of the + task. +- We can spawn threads/thread blocks in both in 1D, 2D or 3D blocks, which may ease the indexing + inside the kernel when dealing with higher dimensional data. + +#### Thread indexing + +Stopping for a moment here to illustrate the last point with a visual aid[^5] +![grid_block_thread](./grid_block_thread.png) + +[^5]: The number of blocks to be run are given by the grid dimension. Image taken from http://tdesell.cs.und.edu/lectures/cuda_2.pdf + +This explains the indexing into a linear array from above + +```julia +i = (blockIdx().x-1) * blockDim().x + threadIdx().x +``` + +which is similar to the computation a linear index of multidimensional (in our case 2D array row ~ `blockIdx` and column `threadIdx`). Again let's use a visual help for this 1D vector[^6] +![thread_indexing](./thread_index.png) + +[^6]: Taken from [https://developer-blogs.nvidia.com/wp-content/uploads/2017/01/cuda\_indexing-1024x463.png](https://developer-blogs.nvidia.com/wp-content/uploads/2017/01/cuda_indexing-1024x463.png) + +#### Launching a kernel + +Let's now dig into what is happening during execution of the line `@cuda threads = (1, len) vadd(d_a, d_b, d_c)`: + +1. Compile the `vadd` kernel to GPU code (via LLVM and it's [NVPTX backend](https://www.llvm.org/docs/NVPTXUsage.html)) +2. Parse and construct launch configuration of the kernel. Here we are creating `1` thread block with `1x100` threads (in reality 128 threads may be launched). +3. Schedule to run `vadd` kernel with constructed launch configuration and arguments. +4. Return the task status. + +It's important to stress that we only schedule the kernel to run, however in order to get the result we have to first wait for the completion. This can be done either via + +- `CUDA.@sync`, which we have already seen earlier +- or a command to copy result to host (`Array(c)`), which always synchronizes kernels beforehand + +::: warning Exercise + +Fix the `vadd` kernel such that it can work with different launch configurations, such as + +```julia +@cuda threads=64 blocks=2 vadd(d_a, d_b, d_c) +@cuda threads=32 blocks=4 vadd(d_a, d_b, d_c) +``` + +Is there some performance difference? Try increasing the size and corresponding number of blocks to cover the larger arrays. + +What happens if we launch the kernel in the following way? + +```julia +@cuda threads=32 blocks=2 vadd(d_a, d_b, d_c) +``` + +Write a wrapper function `vadd_wrap(a::CuArray, b::CuArray)` for `vadd` kernel, such that it spawns the right amount of threads and returns only when the kernels has finished. + +**HINTS**: + +- if you don't know what is wrong with the current implementation just try it, but be warned that you might need to restart Julia after that +- don't forget to use `CUDA.@sync` when benchmarking +- you can inspect the kernel with analogs of `@code_warntype` ~ `@device_code_warntype @cuda vadd(d_a, d_b, d_c)` +- lookup `cld` function for computing the number of blocks when launching kernels on variable sized input + +::: + +::: tip Wrapping kernels + +A usual patter that you will see in GPU related code is that the kernel is written inside a function + +```julia +function do_something(a,b) + function do_something_kernel!(c,a,b) + ... + end + + # handle allocation + # handle launch configuration + @cuda ... do_something_kernel!(c,a,b) +end +``` + +Note that there are hardware limitations as to how many threads can be scheduled on a GPU. You can check it with the following code + +```julia +k = @cuda vadd(d_a, d_b, d_c) +CUDA.maxthreads(k) +``` + +::: + +::: details Show solution + +In order to fix the out of bounds accesses we need to add manual bounds check, otherwise we may run into some nice Julia crashes. +```julia +function vadd(a, b, c) + i = (blockIdx().x-1) * blockDim().x + threadIdx().x + if i <= length(c) + c[i] = a[i] + b[i] + end + return +end +``` + +Launching kernel with insufficient number of threads leads to only partial results. + +```julia +d_c = similar(d_a) +@cuda threads=32 blocks=2 vadd(d_a, d_b, d_c) # insufficient number of threads +Array(d_c) +``` + +Benchmarking different implementation shows that in this case running more threads per block may be beneficial, however only up to some point. + +```julia +len = 10_000 +a = rand(Float32, len) +b = rand(Float32, len) +d_a = CuArray(a) +d_b = CuArray(b) +d_c = similar(d_a) + +julia> @btime CUDA.@sync @cuda threads=256 blocks=cld(len, 256) vadd($d_a, $d_b, $d_c) + @btime CUDA.@sync @cuda threads=128 blocks=cld(len, 128) vadd($d_a, $d_b, $d_c) + @btime CUDA.@sync @cuda threads=64 blocks=cld(len, 64) vadd($d_a, $d_b, $d_c) + @btime CUDA.@sync @cuda threads=32 blocks=cld(len, 32) vadd($d_a, $d_b, $d_c) + 8.447 μs (24 allocations: 1.22 KiB) + 8.433 μs (24 allocations: 1.22 KiB) + 8.550 μs (24 allocations: 1.22 KiB) + 8.634 μs (24 allocations: 1.22 KiB) +``` + +::: + +The launch configuration depends heavily on user's hardware and the actual computation in the kernel, where in some cases having more threads in a block is better (up to some point). + +### Image processing with kernels + +Following up on exercise with image processing let's use kernels for some functions that cannot be easily expressed as array operations. + +::: warning Exercise + +Implement `translate_kernel!(output, input, translation)`, which translates an image `input` in the direction of `translation` tuple (values given in pixels). The resulting image should be stored in `output`. Fill in the empty space with zeros. + +**HINTS**: +- use 2D grid of threads and blocks to simplify indexing +- check all sides of an image for out of bounds accesses + +**BONUS**: In a similar fashion you can create `scale_kernel!`, `rotate_kernel!` for scaling and rotation of an image. + +::: + +::: details Show solution + +```julia +using CUDA +function translate_kernel!(output, input, translation) + x_idx = (blockIdx().x-1) * blockDim().x + threadIdx().x + y_idx = (blockIdx().y-1) * blockDim().y + threadIdx().y + + x_outidx = x_idx + translation[1] + y_outidx = y_idx + translation[2] + + if (1 <= x_outidx <= size(output,1)) && + (1 <= y_outidx <= size(output,2)) && + (x_idx <= size(output,1)) && (y_idx <= size(output,2)) + output[x_outidx, y_outidx] = input[x_idx, y_idx] + end + + return +end + +using FileIO, ImageMagick, ImageShow, ColorTypes +rgb_img = FileIO.load("tape.jpeg"); +gray_img = Float32.(Gray.(rgb_img)); +cgray_img = CuArray(gray_img); +cgray_img_moved = CUDA.fill(0.0f0, size(cgray_img)); + +blocks = cld.((size(cgray_img,1), size(cgray_img,2)), 32) +@cuda threads=(32, 32) blocks=blocks translate_kernel!(cgray_img_moved, cgray_img, (100, -100)) +Gray.(Array(cgray_img_moved)) + +#@cuda threads=(64, 64) blocks=(1,1) translate_kernel!(cgray_img_moved, cgray_img, (-500, 500)) # too many threads per block (fails on some weird exception) - CUDA error: invalid argument (code 1, ERROR_INVALID_VALUE) +``` + +::: + +### Profiling + +CUDA framework offers a wide variety of developer tooling for debugging and profiling our own kernels. In this section we will focus profiling using the Nsight Systems software that you can download after registering [here](https://developer.nvidia.com/nsight-systems). It contains both `nsys` profiler as well as `nsys-ui`GUI application for viewing the results. First we have to run `julia` using `nsys` application. + +- on Windows with PowerShell (available on the lab computers) +```ps +& "C:\Program Files\NVIDIA Corporation\Nsight Systems 2021.2.4\target-windows-x64\nsys.exe" launch --trace=cuda,nvtx H:/Downloads/julia-1.6.3/bin/julia.exe --color=yes --color=yes --project=$((Get-Item .).FullName) +``` + +- on Linux +```bash +/full/path/to/nsys launch --trace=cuda,nvtx /home/honza/Apps/julia-1.6.5/bin/julia --color=yes --project=. +``` +Once `julia` starts we have to additionally (on the lab computers, where we cannot modify env path) instruct `CUDA.jl`, where `nsys.exe` is located. + +```julia +ENV["JULIA_CUDA_NSYS"] = "C:\\Program Files\\NVIDIA Corporation\\Nsight Systems 2021.2.4\\target-windows-x64\\nsys.exe" +``` + +Now we should be ready to start profiling our kernels. + +::: warning Exercise + +Choose a function/kernel out of previous exercises, in order to profile it. Use the `CUDA.@profile` macro the following patter to launch profiling of a block of code with `CUDA.jl` + +```julia +CUDA.@profile CUDA.@sync begin + NVTX.@range "something" begin + # run some kernel + end + + NVTX.@range "something" begin + # run some kernel + end +end + +``` +where `NVTX.@range "something"` is part of `CUDA.jl` as well and serves us to mark a piece of execution for better readability later. Inspect the result in `NSight Systems`. + +::: tip Profiling overhead + +It is recommended to run the code twice as shown above, because the first execution with profiler almost always takes longer, even after compilation of the kernel itself. + +::: + +::: + +::: details Show solution + +In order to show multiple kernels running let's demonstrate profiling of the first image processing exercise + +```julia +CUDA.@profile CUDA.@sync begin + NVTX.@range "copy H2D" begin + rgb_img = FileIO.load("image.jpg"); + gray_img = Float32.(Gray.(rgb_img)); + cgray_img = CuArray(gray_img); + end + + NVTX.@range "negative" begin + negative(cgray_img); + end + NVTX.@range "darken" begin + darken(cgray_img); + end + NVTX.@range "fourier" begin + fourier(cgray_img); + end + NVTX.@range "brightest" begin + brightest(cgray_img); + end +end +``` + +Running this code should create a report in the current directory with the name `report-**.***`, which we can examine in `NSight Systems`. + +::: + +### Matrix multiplication + +::: warning Exercise + +Write a generic matrix multiplication `generic_matmatmul!(C, A, B)`, which wraps a GPU kernel inside. For simplicity assume that both `A` and `B` input matrices have only `Float32` elements. Benchmark your implementation against `CuBLAS`'s `mul!(C,A,B)`. + +**HINTS**: +- use 2D blocks for easier indexing +- import `LinearAlgebra` to be able to directly call `mul!` +- in order to avoid a headache with the choice of launch config use the following code + +```julia +max_threads = 256 + +threads_x = min(max_threads, size(C,1)) +threads_y = min(max_threads ÷ threads_x, size(C,2)) +threads = (threads_x, threads_y) +blocks = ceil.(Int, (size(C,1), size(C,2)) ./ threads) +``` + +::: + +::: details Show solution + +Adapted from the `CUDA.jl` source [code](https://github.com/JuliaGPU/CuArrays.jl/blob/cee6253edeca2029d8d0522a46e2cdbb638e0a50/src/matmul.jl#L4-L50). + +```julia +function generic_matmatmul!(C, A, B) + function kernel(C, A, B) + i = (blockIdx().x-1) * blockDim().x + threadIdx().x + j = (blockIdx().y-1) * blockDim().y + threadIdx().y + + if i <= size(A,1) && j <= size(B,2) + Ctmp = 0.0f0 + for k in 1:size(A,2) + Ctmp += A[i, k]*B[k, j] + end + C[i,j] = Ctmp + end + + return + end + + max_threads = 256 + threads_x = min(max_threads, size(C,1)) + threads_y = min(max_threads ÷ threads_x, size(C,2)) + threads = (threads_x, threads_y) + blocks = ceil.(Int, (size(C,1), size(C,2)) ./ threads) + + @cuda threads=threads blocks=blocks kernel(C, A, B) + + C +end + +K, L, M = 10 .* (200, 100, 50) +A = CuArray(randn(K, L)); +B = CuArray(randn(L, M)); +C = similar(A, K, M); + +generic_matmatmul!(C, A, B) + +using LinearAlgebra +CC = similar(A, K, M) +mul!(CC, A, B) + + +using BenchmarkTools +@btime CUDA.@sync generic_matmatmul!(C, A, B); +@btime CUDA.@sync mul!(CC, A, B); +``` + +::: + +## GPU vendor agnostic code + +There is an interesting direction that is allowed with the high level abstraction of Julia - +[`KernelAbstractions.jl`](https://github.com/JuliaGPU/KernelAbstractions.jl), which offer an +overarching API over CUDA, AMD ROCM and Intel oneAPI frameworks. + +```julia +using KernelAbstractions + +# Simple kernel for matrix multiplication +@kernel function matmul_kernel!(a, b, c) + i, j = @index(Global, NTuple) + + # creating a temporary sum variable for matrix multiplication + tmp_sum = zero(eltype(c)) + for k = 1:size(a)[2] + tmp_sum += a[i,k] * b[k, j] + end + + c[i,j] = tmp_sum +end + +# Create a wrapper kernel which selects the correct backend +function matmul!(a, b, c) + backend = KernelAbstractions.get_backend(a) + kernel! = matmul_kernel!(backend) + kernel!(a, b, c, ndrange=size(c)) +end + +using Metal +a = rand(Float32, 1000, 1000) +b = rand(Float32, 1000, 1000) +ag = a |> MtlArray +bg = b |> MtlArray +c = similar(ag) +matmul!(ag,bg,c) + +@assert a*b ≈ Matrix(c) +``` + +::: warning Exercise + +Rewrite the `vadd` kernel with `KernelAbstractions.jl` + +::: + +::: details Show solution + +Fill out. + +::: \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_11/latency-hiding.jpg b/docs_vitepress/src/lectures/lecture_11/latency-hiding.jpg new file mode 100644 index 00000000..e53d431e Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/latency-hiding.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_11/lecture.md b/docs_vitepress/src/lectures/lecture_11/lecture.md new file mode 100644 index 00000000..5297f935 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/lecture.md @@ -0,0 +1,683 @@ +# [GPU programming](@id gpu_lecture) + +## How GPU differs from CPU + +### Hardware perspective + +**CPU** was originally created for maximal throughput of a single threaded program. Therefore modern CPUs devote much of it's real estate towards maximizing utilization of a computing resource (arithmetic logic unit - ALU), which now occupies relatively small part of the die. Below is the picture of a processor of Intel's Core architecture (one of the earliest in the series). + +![cpu](Intel_Core2.png) +![cpu die](skylake_core_die.png) + +It contains blocks allowing to execute instructions in parallel, out of order, speculatively just to maximally utilize all computational units. +Notable functionalities / blocks are + +* **superscalar execution** is an ability to execute more than one instruction at the given time (see multiple ports to different units). + +* **Reorder buffer** reorders instructions in queue such that two non-interfering instructions can be executed simultaneously. + +* **Register renaming** renames registers, such that two non-interfering instructions operating over the same register can be executed together. + +* **Branch prediction** predicts which branch will be taken and execute instructions in that branch. Misses are costly, as the processor needs to roll back emptying the instruction pipeline. The processor state is not fully restored, which may lead to side-channel attacks such as Spectre[^1]. + +* **speculative prefetching** load instruction / data from memory to processor along the branch that is expected to be taken advance in the hope they will be executed (depends on branch predictions) + +* **Memory management unit** is not shown but takes care of translation of virtual addresses to physical, checking the security bits of pages, etc. + +* **Caches** (three levels) thrive to provide instructions with data from cache, such that it does not have to wait for the load. Caches are opaque to the user, he does not control, what will stay in cache. + +* **L1 Cache synchronization** If the processor contains many cores, their L1 caches are atomically synchronized. + +* **Buffers** are used to cache for example mapping of virtual to physical addresses, partial computations to allow rollbacks. + +* **Interrupt management** CPU can interrupt its execution and transfer the execution to a different location, changing security levels. + +[^1]: Spectre side channel attack on wikipedia. [Url]https://en.wikipedia.org/wiki/Spectre_(security_vulnerability)) + +**GPU** was from the very beginning designed for maximal throughput, primarily achieved by parallelism. The reason for this is simple. Imagine that you need to render a 4K image (resolution 3840 × 2160 = 8 294 400 pixels) with refresh rate 60fps in a first person shooter game. This means that you need to compute intensities of 0.5G pixels per second. Leaving our some details, this program for computing the intensity of a pixel may look something like this + +```julia +for (i,j) in Iterators.Product(1:2160, 1:3840) + image[i,j] = compute_insity(i, j) +end +``` + +where the computation of intensities `compute_insity(i, j)` does not contain many branches. As a result GPUs have been designed for massive parallelism with each core being as simple as possible, leaving all difficulties up to the programmer / compiler. An illustration of a modern gpu architecture is show on the example of NVidia's GPU. + +![nvidia-gpu](nvidia-gpu.jpg) +![nvidia-gpu](nvidia-kepler.jpg) + +1. The chip contains many streaming multi-processors (SM). Normally, each streaming processor would be called core, as it is an indivisible unit, but NVidia decided to call "a core" a unit inside the streaming multi-processor performing stream of operations over a set of 32bit registers. + +2. Each streaming multi-processor contains (possibly multiple blocks of) 32-way (32-wide) SIMT units (NVidia calls that *CUDA Core*), shared memory (managed cache) and register file (16k registers, but shared among all threads). Therefore a pascal P-100 can have up to 64×4×32 = 8192 cores, which certainly sounds cool in comparison to for example 24 cores of normal processors, but given the architecture limitation we cannot expect proportional speedup. + +3. Each streaming multi-processors (SM) has one instruction fetch and decode unit, which means that *all* CUDA cores of that SM *has to* execute the same instruction at a given cycle. This simplifies the design. The execution model therefore roughly corresponds to vector (SIMD) registers in normal CPU, but CUDA cores are not as restricted as SIMD registers. NVidia therefore calls this computation model single instruction multiple threads (SIMT). Main differences: + 1. SIMD requires the memory to be continuous while SIMT does not. + 2. The programming model of SIMT is explicitly scalar while that of SIMD is explicitly vector. +32 CUDA cores each operating over 32bit registers would be equal to 1024bit long vector (SIMD) registers. Modern AMD and Intel processors has 256 bit / 512 bit long registers, which seems similar, as said above in order to use them the data has to be aligned in memory and sometimes they has to be on a particular offset, which might be difficult to achieve in practice (but to be fair, if the data are not aligned in GPU, the loading of data is very inefficient). + +4. 16k registers per SM might seem like a lot, but they are shared between all threads. In modern GPUs, each SM supports up to 2048 threads, which means there might be just 8 32-bit registers per thread. + +5. GPUs do not have virtual memory, interrupts, and cannot address external devices like keyboard and mouse. + +6. GPUs can switch execution contexts of "set of threads" at no cost. In comparison the context switch in CPU is relatively expensive (we need to at least save the content of registers, which is usually sped up by having two sets of registers). This helps to hide latencies, when a set of threads is stalled (they wait for memory access, synchronizing with others). + +7. The programmer deals with "raw" storage hierarchy. This means that we have to manage what will be and what will not be in cache, on GPU. + +8. Caches are synchronized only within a single SM, this is much simpler in comparison with CPU, where L1 caches are synchronized across cores, which presents a bottleneck when number of CPU cores increases. + +9. SM has relatively low frequency clocks, which helps to deal with thermal problems. + +10. The memory in SM is divided into 16 banks, write operations to a bank is sequential. If two threads are writing to the same bank, this write is sequential, therefore one thread(s) has to wait while the other finishes (stalling). + +### Programming / Execution model + +The GPU works in an asynchronous mode. If we want to execute something on GPU, we need to + +1. Upload the data to GPU memory (if they are not already there) + +2. Compile the *kernel* --- a code to be executed on GPU (if not already compiled) + +3. Upload the kernel code to GPU + +4. Request the computation of the kernel (more on this below). GPU will put the request for the computation to its queue of works to be performed and once resources are free, it will do the compuation. + +5. The control is immediately returned, which means that CPU can continue doing its job. + +6. CPU can issue a blocking wait till the computation is done, for example fetching the results, sychronizing with other threads in the block. + +Let's now look at point 4. in more detail. +Recall that GPU is designed for processing data in parallel, something we can abstract as + +```julia +for i in 1:N + kernel!(result, i) +end +``` + +where `kernel!(result, i)` means compute the `i`-th item of the data using function `kernel!`, which is modifying as kernel function cannot return value, but has to put all results to the preallocated array. `i`-th part of the data usually corresponds to one float number (usually `Float32`). Here, we can see that SIMT has a scalar notation, we refer to individual numbers inside arrays. + +Each item, `kernel!(result, i)`, is executed on a single *thread*. GPU *always* execute 32 threads at once on a single SM. This group of 32 threads is called **warp**. These 32 threads within warp can very effectively communicate with each other using atomic instructions. A user has to group threads into **thread blocks**. Each block is executed on a single SM, which means that all threads within this group have access to fast SM local memory. All blocks of single job are called **grid**. + +* From the above we can already see that the number of threads has to be multiple of 32 (given by the requirement on warps). + +* Each block can have up to 2048 threads, which are executed on a single SM. Large number of threads in a single block is good if + * those threads needs to access the same memory (for example in Stencil operations), which can be put to the local cache and + * each thread reads data from memory (which are not coalesced) and the SM can run different thread while other is stalling (mostly due to waiting for finishing loading data from memory). +On the other hand large number of threads per group might stall due to insufficient number of registers and / or other threads being busy. + +* The total number of issued threads has to be multiple of 32 and of the number of threads per block, hence *there will almost always be threads that does not do anything* (unless the size of the job is aligned with the number of threads spawned. + +* The division of a total number of items to be processed `N` into blocks is therefore part of the problem and it can be specific to a version of GPU. + +* For some operations (e.g. reduction seen below) to get the highest performance, you need to write the same algorithm for three levels of sets of threads --- warp, groups, and grid. This is the price paid for exposed cache levels (we will se an example below on reduction). + +As has been mentioned above, all CUDA cores in one SM are processing the same instruction. therefore if the processed kernel contains conditional statements `if / else` blocks, both blocks will be processed in sequential order as illustrated below, + +![simt](simt-divergence.png) + +which can significantly decrease the throughput. + +### Latency hiding + +A thread can stall, because the instruction it depends on has not finished yet, for example due to loading data from memory, which can be very time consuming (recall that unlike SIMD, SIMT can read data from non-coallesced memory location at the price of increased latency). Instead of waiting, SM will switch to execute different set of threads, which can be executed. This context switch does not incur any overhead, hence it can occur at single instruction granularity. It keeps SM busy effective hiding latency of expensive operations. + +![latency hiding](latency-hiding.jpg) +![image taken from](https://iq.opengenus.org/key-ideas-that-makes-graphics-processing-unit-gpu-so-fast/) + +## [using GPU without writing kernels](@id gpu_lecture_no_kernel) + +Julia, as many other languages, allows to perform certain operations on GPU as you would do on CPU. Thanks to Julia's multiple dispatch, this is almost invisible and it is sufficient to convert the `Array` to `CuArray` to notify the system that array is in GPU's memory. + +For many widely used operations, we have available kernels, for example below, we use multiplication. + +```julia +using CUDA +using BenchmarkTools + +x = randn(Float32, 1000, 1000) +y = randn(Float32, 1000, 1000) +x * y +cx = CuArray(x) +cy = CuArray(y) +cx * cy +x * y ≈ Matrix(cx * cy) +julia> @btime x * y; + 5.737 ms (2 allocations: 3.81 MiB) + +julia> @btime cx * cy; + 18.690 μs (32 allocations: 624 bytes) + +julia> @btime CUDA.@sync cx * cy; + 173.704 μs (32 allocations: 624 bytes) + +julia> @btime Matrix(CuArray(x) * CuArray(y)); + 1.651 ms (47 allocations: 3.82 MiB) +``` + +The matrix multiplication on GPU is about 33x faster, which is likely caused by being optimized by directly NVidia, as `cx * cy` calls `CuBlas` library. If we add the cost of sending and retrieving data from the memory, we have 3.5x speedup, but that is not fair. The goal is to compute everything on GPU. + +How much does it cost to send the matrix to GPU's memory? Let's measure the time of the roundtrip + +```julia +julia> @btime Matrix(CuMatrix(x)); + 1.059 ms (9 allocations: 3.81 MiB) +``` + +Since Julia is JAoT compiled, the set of operations you can do using Julia's kernels is relatively large. `CUDA.jl` implements generic `map` and `reduce` (albeit as we will se later, performant `reduce` operation is very difficult), which allows you to pass your function greatly extending the functionality. + +```julia +sin.(cx).^2 .+ cos.(cx).^2 +map(x -> sin(x)^2 + cos(x)^2, cx) +reduce(max, cx) +reduce(max, cx, dims = 1) +``` + +Notice that in case, the function in `map` and in broadcasting is essentially a custom kernel. As such, the code within has to still obey (not precisely specified but clear for seasoned Julia programmers) rules on what can be executed as kernel. Also needless to say, that the generic `map` over `CuArray` will try to find good launch configuration (number of threads and number of blocks), which might not be an ideal for your application. + +An example adapted from (Tim Bessard's talk on JuliaCon 2023 in Eindhoven)[https://www.youtube.com/watch?v=Q8fj8QbVpZM] + +```julia +l2(x,y) = sqrt(sum((x - y) .^2)) +l2b(x,y) = sqrt(sum((x .- y) .^2)) +l2m(x,y) = sqrt(mapreduce((xᵢ,yᵢ) -> (xᵢ - yᵢ)^2, +, x, y)) + +@btime CUDA.@sync l2(cx, cy) +@btime CUDA.@sync l2b(cx, cy) +@btime CUDA.@sync l2m(cx, cy) +``` + +Let's now try to use CUDA on computation of Julia set, which should benefit a lot from CUDA's paralelization, as we can dispatch each pixel to each thread --- something GPUs were originally designed for. + +We slightly modify the kernel we have used in our lecture on multi-threadding, mainly to force all types to be 32-bit wide + +```julia +using CUDA +using BenchmarkTools + +function juliaset_pixel(i, j, n) + c = ComplexF32(-0.79f0, 0.15f0); + z = ComplexF32(-2f0 + (j-1)*4f0/(n-1), -2f0 + (i-1)*4f0/(n-1)) + for i in UnitRange{Int32}(0:255) + abs2(z)> 4.0 && return(i%UInt8) + z = z*z + c + end + return(i%UInt8) +end + +n = Int32(1000); +is = collect(Int32, 1:n)'; +js = collect(Int32, 1:n); +@btime img = juliaset_pixel.(is, js, n); + +cuis = CuArray(is); +cujs = CuArray(js); +img = juliaset_pixel.(cuis, cujs, n); +@btime CUDA.@sync juliaset_pixel.(cuis, cujs, n); +``` + +We see that CPU version takes around 50ms with the GPU version takes about 64μs, which is **three** order of magnited faster. Notice that we have obtained this speedup almost for free without writing anything that would be GPU specific. Our `juliset_pixel` is the same function working the same on CPU and on GPU. If we take into the account moving the memory + +```julia +@btime Matrix(juliaset_pixel.(CuArray(is), CuArray(js), n)) +``` + +is about `315` μs, which still 160x faster. + +::: tip Internal profiler + +`CUDA.jl` offers a simple profiler which showing how much time has been spent in CPU and in GPU. This profiler is easy to use, as it is shipped with `CUDA.jl` and can be immediately used, but the provided informations are less detailed in comparison to NVIDIA's profiles shown below. + +The profiler can be called as + +```julia +CUDA.@profile CUDA.@sync juliaset_pixel.(cuis, cujs, n); +``` + +::: + +::: tip External profilers + +Cuda offers a two profilers: NVIDIA Nsight Systems and NVIDIA Nsight Compute. The first is good to optimize the overall execution of your application, observing when the kernel is launched, delays in kernel launch, utilization of CPU and GPU, etc. The second is good for optimizing the single kernel. Profilers are not shipped with `CUDA.jl` and you need to download them from NVidia's resources for developers [here](https://developer.nvidia.com/) after you create an account for free. + +To use the profiler, we need to launch julia within the profiler as for example `/opt/nvidia/nsight-systems/2021.5.1/bin/nsys launch --trace=cuda,nvtx /opt/julia-1.10.0-rc3/bin/julia --color=yes`. +and then, we can profile the code using the usual `@profile` macro this time sourced from `CUDA` as + +```julia +CUDA.@profile external=true CUDA.@sync juliaset_pixel.(cuis, cujs, n); +``` + +the report is saved to `report???.???` (nvidia likes to change the suffix) and it can be inspected by `nsys-ui` interactive tool. **Do not forget to run the profiler twice to get rid of compilation artifacts.** +You can further anotate parts of your code as + +```julia +CUDA.@profile external=true CUDA.@sync begin + NVTX.@range "julia set" juliaset_pixel.(cuis, cujs, n); +end +``` + +for better orientation in the code. Note that if nvtx information does not show up in the trace we have to add it to the tracing running the profiler with `--trace=cuda,nvtx`. [for more sophisticated example click here](profile_nn.jl) +Lastly it is recommended to run a kernel twice in a profile trace as the first execution of the kernel in a profiler incurs some overhead, even though the code has been already compiled. + +::: + +In the output of the profiler we see that there is a lot of overhead caused by launching the kernel itself and then, the execution is relatively fast. + +While Julia's JAoT greatly enhances the power of prepared kernels, you might quickly run into a case, when you are able to perform the operation on GPU, but it is very slow. Sometimes, it might be just faster to move the array to CPU, perform the operation there and move it back to GPU. Although this sounds like a pretty bad idea, it actually works very well see below. + +```julia +using Mill +using Random +using CUDA +using BenchmarkTools + +n = vcat(rand(1:10,1000), rand(11:100, 100), rand(101:1000,10)) +x = randn(Float32, 128, sum(n)) +z = zeros(Float32, 128, 1) +bags = Mill.length2bags(n) + +builtin(x, bags, z) = Mill.segmented_sum_forw(x, vec(z), bags, nothing) + +function naive(x, bags, z) + o = similar(x, size(x,1), length(bags)) + foreach(enumerate(bags)) do (i,b) + if isempty(b) + o[:,i] .= z + else + @inbounds o[:,i] = sum(@view(x[:,b]), dims = 2) + end + end + o +end + +builtin(x, bags, z) ≈ naive(x, bags, z) +@btime builtin(x, bags, z); +@btime naive(x, bags, z); + +cx = CuArray(x); +cz = CuArray(z); +naive(cx, bags, cz); +@btime CUDA.@sync naive(cx, bags, cz); +@btime CUDA.@sync CuArray(builtin(Array(cx), bags, Array(cz))); +``` + +## [Writing own CUDA kernels](@id gpu_lecture_yes_kernel) + +Before diving into details, let's recall some basic from the above HW section: + +* In CUDA programming model, you usually write *kernels*, which represent *body* of a for loop. +* `N` iterations of the loop is divided into *block*s and each block into *warp*s. Single warp consists of 32 threads and these threads are executed simultaneously. All threads in the block are executed in the same SM, having access to the shared memory. +* Each thread executes the kernel for one iteration of the for loop. This means that inside the kernel, you need to determine the index of the iteration. Therefore you will see in kernels statements like + +```julia + i = threadIdx().x + (blockIdx().x - 1) * blockDim().x +``` + +where `threadIdx().x` is the index of the thread within the block, `blockDim().x` is the total number of threads in the block, and `blockIdx().x` is the index of the block within the grid. `x` property suggest that you can partition the execution along three-dimensional cube (three nested for loops), which might be sometimes useful. + +The most trivial example of a kernel is addition as + +```julia +function vadd!(c, a, b, n) + i = threadIdx().x + (blockIdx().x - 1) * blockDim().x + if i <= n + c[i] = a[i] + b[i] + end + return +end + +a = CuArray(Float32.(1:10000)) +b = CuArray(Float32.(2:2:20000)) +c = similar(a) +@cuda threads=1024 blocks=cld(length(a), 1024) vadd!(c, a, b, length(a)) +c +``` + +where + +* we have defined a kernel function `vadd` which looks more or less like normal Julia function, except it returns nothing and it contains identification of an item within loop `i = threadIdx().x + (blockIdx().x - 1) * blockDim().x`. +* we have pre-allocated space to store results for `vadd` in `c` +* `@cuda` is the launch macro, where we have to specify the number of threads per block (the `threads` keyword) and the number of blocks `blocks`. The macro returns an execution context, not the actual value, which is in this case stored in `c`. You can think about the `@cuda` as moral equivalent of parallel loop. +* If `N` is not divisible by `blockDim`, then there will be **always** threads not doing anything, therefore we need to have the `if` statement that we are within bounds. +* The `blockDim` (number of threads in a block) has to be divisible by 32, which is the size of the warp. + +While the `vadd` example is nice, it is trivial and can be achieved by `map` as shown above. A simple operation that is everything but trivial to implement is *reduction*, since it ends up in a single operation. It also allows to demonstrate, why efficient kernels needs to be written at three levels: warp, block, and grid. The exposition below is based on [JuliaCon tutorial on GPU programming](https://github.com/maleadt/juliacon21-gpu_workshop/blob/main/deep_dive/CUDA.ipynb). + +The first naive implementation might looks like + +```julia +function reduce_singlethread(op, a, b) + for i in 1:length(a) + b[] = op(b[], a[i]) + end + return +end + +x = rand(Float32, 1024, 1024) +cx = CuArray(x) +cb = CUDA.zeros(1) +@cuda threads=1 reduce_singlethread(+, cx, cb) +CUDA.@allowscalar cb[] +sum(x) +``` + +and it is pretty terrible, because all the hard work is done by a single thread. The result of the kernel is different from that of `sum` operation. Why is that? This discrepancy is caused by the order of the arithmetic operations, which can be verified by computing the sum as in the kernel as + +```julia +foldl(+, x, init=0f0) +``` + +For the sake of completness, we benchmark the speed of the kernel for comparison later on + +```julia +@benchmark CUDA.@sync @cuda threads=1 reduce_singlethread(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +``` + +We can use **atomic** operations to mark that the reduction operation has to be performed exclusively. This have the advantage that we can do some operation while fetching the data, but it is still a very bad idea. + +```julia +function reduce_atomic(op, a, b) + i = threadIdx().x + (blockIdx().x - 1) * blockDim().x + if i <= length(a) + CUDA.@atomic b[] = op(b[], a[i]) + end + return +end + +x = rand(Float32, 1024, 1024) +cx = CuArray(x) +cb = CUDA.zeros(1) +@cuda threads=1024 blocks = cld(length(cx), 1024) reduce_atomic(+, cx, cb) +CUDA.@allowscalar cb[] +sum(x) + +@benchmark CUDA.@sync @cuda threads=1024 blocks=1024 reduce_atomic(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +``` + +This solution is better then the single-threadded version, but still very poor. + +Let's take the problem seriously. If we want to use paralelism in reduction, we need to perform parallel reduction as shown in the figure below[^2] +![parallel reduction](parallel_reduction.png) + +[^2]: Taken from [url](https://riptutorial.com/cuda/topic/6566/parallel-reduction--e-g--how-to-sum-an-array-) + +The parallel reduction is tricky. **Let's assume that we are allowed to overwrite the first argument a**. This is relatively safe assumption, since we can always create a copy of `a` before launching the kernel. + +```julia +function reduce_block(op, a, b) + elements = 2* blockDim().x + thread = threadIdx().x + + # parallel reduction of values in a block + d = 1 + while d < elements + sync_threads() + index = 2 * d * (thread-1) + 1 + @inbounds if index <= elements && index+d <= length(a) + @cuprintln "thread $thread: a[$index] + a[$(index+d)] = $(a[index]) + $(a[index+d]) = $(op(a[index], a[index+d]))" + a[index] = op(a[index], a[index+d]) + end + d *= 2 + thread == 1 && @cuprintln() + end + + if thread == 1 + b[] = a[1] + end + + return +end + +a = CuArray(1:16); +b = CuArray([0]); +@cuda threads=cld(length(a),2) reduce_block(+, a, b); +CUDA.@allowscalar b[] +``` + +* The while loop iterates over the levels of the reduction, performing $$2^{\log(\textrm{blockDim}) - d + 1})$$ reductions. +* We need to sychronize threads by `sync_threads`, such that all reductions on the level below are finished +* The output of the reduction will be stored in `a[1]` +* We use `@cuprintln` which allows us to print what is happening inside the thread execution. +* Notice how the number of threads doing some work decreases, which unfortunately inevitable consequence of `reduce` operation. + +To extend the above for multiple blocks, we need to add reduction over blocks. The idea would be to execute the above loop for each block independently, and then, on the end, the first thread would do the reduction over blocks, as + +```julia +function reduce_grid_atomic(op, a, b) + elements = 2*blockDim().x + offset = 2*(blockIdx().x - 1) * blockDim().x + thread = threadIdx().x + + # parallel reduction of values within the single block + d = 1 + while d < elements + sync_threads() + index = 2 * d * (thread-1) + 1 + @inbounds if index <= elements && index+d+offset <= length(a) + index += offset + a[index] = op(a[index], a[index+d]) + end + d *= 2 + end + + # atomic reduction of this block's value + if thread == 1 + CUDA.@atomic b[] = op(b[], a[offset + 1]) + end + + return +end + +x = rand(Float32, 1024, 1024) +cx = CuArray(x) +cb = CUDA.zeros(1) +@cuda threads=1024 blocks=cld(length(cx), 2*1024) reduce_grid_atomic(+, cx, cb) +CUDA.@allowscalar cb[] +sum(x) +``` + +Recall that each block is executed on a separate SM, each equipped with the local memory. So far, we have been doing all computations in the global memory, which is slow. So how about to copy everything to the local memory and then perform the reduction. This would also have the benefit of not modifying the original arrays. + +```julia +function reduce_grid_localmem(op, a::AbstractArray{T}, b) where {T} + elements = 2*blockDim().x + offset = 2*(blockIdx().x - 1) * blockDim().x + thread = threadIdx().x + + shared = @cuStaticSharedMem(T, (2048,)) + @inbounds shared[thread] = a[offset+thread] + @inbounds shared[thread+blockDim().x] = a[offset+thread+blockDim().x] + + # parallel reduction of values within the single block + d = 1 + while d < elements + sync_threads() + index = 2 * d * (thread-1) + 1 + @inbounds if index <= elements && index+d+offset <= length(a) + index += offset + a[index] = op(a[index], a[index+d]) + end + d *= 2 + end + + # atomic reduction of this block's value to the global accumulator + if thread == 1 + CUDA.@atomic b[] = op(b[], a[offset + 1]) + end + return +end + +x = rand(Float32, 1024, 1024) +cx = CuArray(x) +cb = CUDA.zeros(1) +@cuda threads=1024 blocks=cld(length(cx), 2*1024) reduce_grid_localmem(+, cx, cb) +CUDA.@allowscalar cb[] +sum(x) + +@benchmark CUDA.@sync @cuda threads=1024 blocks=512 reduce_grid_localmem(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +``` + +The performance improvement is negligible, but that's because we have a relatively new GPU with lots of global memory bandwith. On older or lower-end GPUs, using shared memory would be valuable. But at least, we are not modifying the original array. + +If we inspect the above kernel in profiler, we can read that it uses 32 registers per thread. But if the SM has 16384 registers, then block of size 1024 will have to share registers, which might lead to poor utilization. Changing the blocksize to 512 improves the throughput a bit as can be seen from below + +```julia +@benchmark CUDA.@sync @cuda threads=512 blocks=1024 reduce_grid_localmem(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +``` + +The above optimizations are not by any means specific to CUDA, they can be applied for all GPU-type accelerators, one AMD cards with `AMDGPU.jl` and others (Intel) with `oneAPI.jl`, or you can try to write a backed agnostic kernel using `KernelAbstrations.jl`. + +The performant versions of reduction described above always performs the reduction on two levels: at the block level and on the grid level. The block level takes advantage of the all threads being executed on the single SM, which means we can synchronize threads and we have access to local mem. The reduction on the grid level of results of blocks is little sloppy, as we rely on atomic addition. This might not be a bad decision, since blocks are unlikely to finish at the same time. + +* Within a *Warp*, we can use atomic instructions which allows to access values in other thread registers. https://developer.nvidia.com/blog/using-cuda-warp-level-primitives/ + +But as we have said above, a group of 32 threads executed under a single warp have fast access to each other registers of threads. To exploit this, we would need to write reduction on the level of warp, block, and grid. The following code is adapted from `CUDA.jl` and it uses a trick of having a `neutral` element for the reduction operator, which is zero for addition, one for multiplication, minus infinity for maximum, etc. This neutral element simplifies "padding", as we simply replace it with a neutral item. + +The rational behind the code is following. Each thread will load a value to be reduced from a memory to a register --- a variable called `val` below. This means that unlike above, the number of threads is equal to the number of elements to reduce. + +First, groups of 32 threads perform their reduction in function `demo_reduce_warp`, where the function `shfl_down_sync` copy a value from register holding a variable name `val` from a thread at `offset` distance. The first argument `0xffffffff` is a mask indicating which threds contain valid values. Since we load to `val` `neutral` element, all threads participating the computation is valid. As above, we perform five iterations until all 32 values are reduced and stored in the `val` of the first thread of the warp. + +Second, on the block level, first thread of each warp store the reduced value `val` to local memory and from that each thread of the first warp of that block loads this reduced value to `val`. If there will be fewer number of warps in the block, we load `neutral`. After this, the first warp contains results of reduction of each warp in the block, and we can call reduction on the first warp to reduce the data, effectively reducing the block. + +Third, values across block are reduced using `@atomic` approach as before. + +```julia +@inline function demo_reduce_warp(op, val) + # assume(warpsize() == 32) + offset = 0x00000001 + while offset < warpsize() + val = op(val, shfl_down_sync(0xffffffff, val, offset)) + offset <<= 1 + end + return val +end + +@inline function demo_reduce_block(op, val::T, neutral) where T + # shared mem for partial sums + # assume(warpsize() == 32) + shared = CuStaticSharedArray(T, 32) + + wid, lane = fldmod1(threadIdx().x, warpsize()) + + # each warp performs partial reduction + val = demo_reduce_warp(op, val) + + # write reduced value to shared memory + if lane == 1 + @inbounds shared[wid] = val + end + + # wait for all partial reductions + sync_threads() + + # read from shared memory only if that warp existed + val = if threadIdx().x <= fld1(blockDim().x, warpsize()) + @inbounds shared[lane] + else + neutral + end + + # final reduce within first warp + if wid == 1 + val = demo_reduce_warp(op, val) + end + return val +end + +function reduce_warp(op, a, b, neutral) + index = (blockIdx().x - 1) * blockDim().x + threadIdx().x + val = index <= length(a) ? a[index] : neutral + + val = demo_reduce_block(op, val, neutral) + + if threadIdx().x == 1 + CUDA.@atomic b[] += val + end + + return +end + +x = rand(Float32, 1024, 1024) +cx = CuArray(x) +cb = CUDA.zeros(1) +CUDA.@sync @cuda threads=512 blocks=2048 reduce_warp(+, cx, cb, 0f0) +CUDA.@allowscalar cb[] +sum(x) + +@benchmark CUDA.@sync @cuda threads=512 blocks=2048 reduce_warp(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1)), 0f0) +``` + +This approach improves the our previous best by 21%, which is not bad at all + +Let's now compare different versions and tabulate the results + +```julia +@benchmark CUDA.@sync @cuda threads=1 reduce_singlethread(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +@benchmark CUDA.@sync @cuda threads=1024 blocks=1024 reduce_atomic(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +@benchmark CUDA.@sync @cuda threads=1024 blocks=512 reduce_grid_atomic(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +@benchmark CUDA.@sync @cuda threads=1024 blocks=512 reduce_grid_localmem(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1))) +@benchmark CUDA.@sync @cuda threads=512 blocks=2048 reduce_warp(+, $(CUDA.rand(1024,1024)), $(CUDA.zeros(1)), 0f0) +@benchmark sum($(CUDA.rand(1024,1024))) +@benchmark sum($(rand(Float32, 1024,1024))) +``` + +| kernel version | min time | +|:-----------------------------------------------------|:-----------:| +| single thread | 56.399 ms | +| multiple threads with atomic reduction | 1.772 ms | +| parallel reduction | 33.381 μs | +| parallel reduction with local mem | 34.261 μs | +| parallel reduction with warps | 26.890 μs | +| default sum on GPU | 31.960 μs | +| default sum on CPU | 82.391 μs | + + +What we have missed to optimize: + +* tune the launch configuration (for `reduce_warp` 128 threads seems mildly better with 25.790 μs) +* avoid shared memory bank conflicts +* analyse the access pattern to ensure all memory accesses are coalesced + +The card we use GeForce RTX 2080 Ti which has a peak memory bandwidth of `616.0 GB/s`. If we take our best launch configuration with 128 threads, our throughput is `32*2^20 / (25 * 10^-6) ≈ 1.342 TB/s` which seems like we are like twice over the theoretical memory limit, which is weird + +## How Julia compiles CUDA kernels + +`CudaNative.jl` (predecessor of `CUDA.jl`)[^bessard18] was a seminal work as it was the first demonstrating Julia producing a native code for a different platform (backend) than that of the main processor. Specifically, while Julia is producing code for x86 (or Arm), `CUDA.jl` makes her to compile for PTX ISA of NVidia GPUs. `CudaNative.jl` has created a flexible toolchain, which was later generalized for backends, namely for AMD and Intel accelerators, and also for Berkeley packet filter[^bpf]. + +An important but rarely approaciated fact is that `CUDA.jl` makes the development of kernels interactive, which speeds up the development a lot. + +The following notes are based on [^bessard18] and on inspection of `CUDA.jl` version 3.6.2. The birds eye view on the toolchain is as follows: +![cuda native toolchain](cudanative.png) + +When we wish to launch kernel using `@cuda (config...) function(args...)`, the julia (roughly) performs following steps. + +1. If the kernel is already compiled to PTX, go to step 4 +2. Use Julia's compiler to produce LLVM code for a given kernel using `@code_llvm`. This is the **front-end** part and its result is LLVM code in textual form (Julia's interface with its runtime part is textual). +3. Take the LLVM code, perform optimization passes and adjustments, and compile the code of the kernel for CUDA backend. This is the **back-end** part. +4. Use a runtime library provided by NVidia's CUDA to launch the compiled kernel. + +### Caveats of the front-end part + +* Authors state that for the integration, they require access to AST, lowered IR code, and to LLVM IR. +* For seamless integration, the above is not sufficient, as the generated LLVM code will contain calls to Julia runtime, such as exception handling, garbage collection, and dynamic allocation of memory. Authors have introduced of configuration parameters and hooks to type inference and codegeneration in Julia v0.6, which allows to lower code without these calls (other considered option was to remove these calls in post-processing would lead to messy, fragile, and error-prone implementation.) +* The above hooks allow to reuse most of Julia's code generation. There is a lot of functionality you got for free, like parsing, macro-expansion, optimization passes. + +### Caveats of the back-end part + +* `@cuda (config...) function(args...)` is the main gateway to launch cuda kernels. It compiles the kernel `function` if needed and lauch it on GPU device with `config`. +* The LLVM code produced in the front-end part is compiled using LLVM NVPTX compiler, which is accessed through a wrapper `LLVM.jl`. Funny enough, `LLVM.jl` just exposes C-functions of LLVM compiler shipped with Julia. +* The fact that LLVM can generate PTX code avoid shipping of another compiler. +* Before LLVM code is passed to the LLVM compiler, certain optimizations are performed. For example immutable parameters are always passed by value instead of the reference. + +## Sources + +* [SIMD < SIMT < SMT: parallelism in NVIDIA GPUs](http://yosefk.com/blog/simd-simt-smt-parallelism-in-nvidia-gpus.html) +* [Blog by Mike Innes on GPU programming and gradients for free](http://mikeinnes.github.io/2017/08/24/cudanative.html) +* [Tutorial on GPU programming at JuliaCon 2021](https://www.youtube.com/watch?v=Hz9IMJuW5hU) +* [Materials for tutorial on GPU programming at JuliaCon 2021](https://github.com/maleadt/juliacon21-gpu_workshop/blob/main/deep_dive/CUDA.ipynb) +* [Using CUDA Warp-Level Primitives](https://developer.nvidia.com/blog/using-cuda-warp-level-primitives/) +* https://juliagpu.org/post/2020-11-05-oneapi_0.1/ +* https://www.youtube.com/watch?v=aKRv-W9Eg8g + +[^bpf]: https://ebpf.io/ +[^bessard18]: Besard, Tim, Christophe Foket, and Bjorn De Sutter. "Effective extensible programming: unleashing Julia on GPUs." IEEE Transactions on Parallel and Distributed Systems 30.4 (2018): 827-841. \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_11/leftover.md b/docs_vitepress/src/lectures/lecture_11/leftover.md new file mode 100644 index 00000000..25ecf7b1 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/leftover.md @@ -0,0 +1,78 @@ + +```julia +cx = CUDA.rand(1024,1024) +cb = CUDA.zeros(1) +@macroexpand @cuda threads=128 blocks=8192 reduce_warp(+, cx, cb, 0f0) + +quote + var"##415" = (+) + var"##416" = CUDA.rand(1024, 1024) + var"##417" = CUDA.zeros(1) + var"##418" = 0.0f0 + begin + var"##f_var#419" = reduce_warp + $(Expr(:gc_preserve, quote + local var"##kernel_f#420" = (CUDA.cudaconvert)(var"##f_var#419") + local var"##kernel_args#421" = map(CUDA.cudaconvert, (var"##415", var"##416", var"##417", var"##418")) + local var"##kernel_tt#422" = Tuple{map(Core.Typeof, var"##kernel_args#421")...} + local var"##kernel#423" = (CUDA.cufunction)(var"##kernel_f#420", var"##kernel_tt#422"; ) + if true + var"##kernel#423"(var"##415", var"##416", var"##417", var"##418"; $(Expr(:(=), :threads, 128)), $(Expr(:(=), :blocks, 8192))) + end + var"##kernel#423" + end, Symbol("##415"), Symbol("##416"), Symbol("##417"), Symbol("##418"), Symbol("##f_var#419"))) + end +end +``` + +prepares arguments to compile the kernel, which is done in the function `CUDA.cufunction` and we can prepare it as + +```julia +f = CUDA.cudaconvert(reduce_warp) +cuparams = CUDA.cudaconvert((+, cx, cb, 0f0)) +tt = Tuple{map(Core.Typeof, cuparams)...} +kernel_struct = CUDA.cufunction(f, tt) +``` + +Diving into the `CUDA.cufunction` at `CUDA.jl/src/compiler/execution.jl:290` we observe that it prepares the compilation job and send it to the GPU compiler, which either compiles it or fetch it from the the cache. + +```julia +function cufunction(f::F, tt::TT=Tuple{}; name=nothing, kwargs...) where {F,TT} + cuda = active_state() + cache = cufunction_cache(cuda.context) + source = FunctionSpec(f, tt, true, name) + target = CUDACompilerTarget(cuda.device; kwargs...) + params = CUDACompilerParams() + job = CompilerJob(target, source, params) + return GPUCompiler.cached_compilation(cache, job, + cufunction_compile, + cufunction_link)::HostKernel{F,tt} +end +``` + +Let's now dive into the compilation part. We get a sense using `Cthulhu.jl`, but sometimes, it is good to do this by hand + +```julia +using CUDA: active_state, cufunction_cache, FunctionSpec, CUDACompilerTarget, CUDACompilerParams, cufunction_compile, cufunction_link +cuda = active_state() +cache = cufunction_cache(cuda.context) +source = FunctionSpec(f, tt, true, nothing) +target = CUDACompilerTarget(cuda.device) +params = CUDACompilerParams() +job = CompilerJob(target, source, params) +GPUCompiler.cached_compilation(cache, job, cufunction_compile, cufunction_link) +``` + +* `FunctionSpec` is just a struct contains information about a function that will be compiled +* `CompilerJob` is a structure containing all important information for compilation of the kernel +* `GPUGCompile.cached_compilation` is a cache, which either fetches the kernel from the cache, or force the compilation. Let's look at the compilation + +Lowerint to ptx takes form in + +```julia +# lower to PTX +mi, mi_meta = GPUCompiler.emit_julia(job) +ir, ir_meta = GPUCompiler.emit_llvm(job, mi) +asm, asm_meta = GPUCompiler.emit_asm(job, ir; + format=LLVM.API.LLVMAssemblyFile) +``` diff --git a/docs_vitepress/src/lectures/lecture_11/nvidia-gpu.jpg b/docs_vitepress/src/lectures/lecture_11/nvidia-gpu.jpg new file mode 100644 index 00000000..41e13205 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/nvidia-gpu.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_11/nvidia-kepler.jpg b/docs_vitepress/src/lectures/lecture_11/nvidia-kepler.jpg new file mode 100644 index 00000000..3189e606 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/nvidia-kepler.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_11/parallel_reduction.png b/docs_vitepress/src/lectures/lecture_11/parallel_reduction.png new file mode 100644 index 00000000..502cf46c Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/parallel_reduction.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/profile_nn.jl b/docs_vitepress/src/lectures/lecture_11/profile_nn.jl new file mode 100644 index 00000000..0666d3ff --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/profile_nn.jl @@ -0,0 +1,40 @@ +using CUDA + +# define a dense layer +struct Dense{W<:AbstractArray,B<:AbstractArray,F} + w::W + b::B + f::F +end + +function Dense(idim, odim, f = identity) + Dense(randn(Float32, odim, idim), randn(Float32, odim), f) +end + +function (l::Dense)(x) + l.f.(l.w * x .+ l.b) +end + +#define moving of data to CPU +gpu(x::AbstractArray) = CuArray(x) +cpu(x::CuArray) = Array(x) +gpu(l::Dense) = Dense(gpu(l.w), gpu(l.b), l.f) +gpu(l::ComposedFunction) = gpu(l.outer) ∘ gpu(l.inner) + +# a simple but powerful non-linearity +relu(x::T) where {T<:Number} = max(x, zero(T)) + + +# Let's now define a small one hidden layer neural network +x = randn(Float32, 16, 100) +l₁ = Dense(16,32, relu) +l₂ = Dense(32,8) +nn = l₂ ∘ l₁ + +# and try to profile a computation +CUDA.@profile CUDA.@sync begin + NVTX.@range "moving nn to gpu" gpu_nn = gpu(nn) + NVTX.@range "moving x to gpu" gpu_x = gpu(x) + NVTX.@range "nn(x)" o = gpu_nn(gpu_x) + NVTX.@range "moving results to cpu" cpu(o) +end diff --git a/docs_vitepress/src/lectures/lecture_11/simt-divergence.png b/docs_vitepress/src/lectures/lecture_11/simt-divergence.png new file mode 100644 index 00000000..26ee2e99 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/simt-divergence.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/skylake_core_die.png b/docs_vitepress/src/lectures/lecture_11/skylake_core_die.png new file mode 100644 index 00000000..d322c3dd Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/skylake_core_die.png differ diff --git a/docs_vitepress/src/lectures/lecture_11/test.jl b/docs_vitepress/src/lectures/lecture_11/test.jl new file mode 100644 index 00000000..091d937e --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_11/test.jl @@ -0,0 +1,41 @@ +using KernelAbstractions + +# Simple kernel for matrix multiplication +@kernel function matmul_kernel!(a, b, c) + i, j = @index(Global, NTuple) + + # creating a temporary sum variable for matrix multiplication + tmp_sum = zero(eltype(c)) + for k = 1:size(a)[2] + tmp_sum += a[i,k] * b[k, j] + end + + c[i,j] = tmp_sum +end + +# Creating a wrapper kernel for launching with error checks +function matmul!(a, b, c) + if size(a)[2] != size(b)[1] + println("Matrix size mismatch!") + return nothing + end + backend = KernelAbstractions.get_backend(a) + kernel! = matmul_kernel!(backend) + kernel!(a, b, c, ndrange=size(c)) +end + +using Metal +a = rand(Float32, 10000, 10000) +b = rand(Float32, 10000, 10000) +ag = a |> MtlArray +bg = b |> MtlArray +c = similar(ag) +matmul!(ag,bg,c) + +@assert a*b ≈ Matrix(c) + +@btime $a*$b +@btime Metal.@sync matmul!($ag, $bg, $c) +# KernelAbstractions.synchronize(backend) + +# @test isapprox(c, a*b) diff --git a/docs_vitepress/src/lectures/lecture_11/thread_index.png b/docs_vitepress/src/lectures/lecture_11/thread_index.png new file mode 100644 index 00000000..3274eb22 Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_11/thread_index.png differ diff --git a/docs_vitepress/src/lectures/lecture_12/LV_GaussNum.svg b/docs_vitepress/src/lectures/lecture_12/LV_GaussNum.svg new file mode 100644 index 00000000..26fa7847 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/LV_GaussNum.svg @@ -0,0 +1,466 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/LV_GaussNum2.svg b/docs_vitepress/src/lectures/lecture_12/LV_GaussNum2.svg new file mode 100644 index 00000000..498d80a6 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/LV_GaussNum2.svg @@ -0,0 +1,454 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/LV_Measurements.svg b/docs_vitepress/src/lectures/lecture_12/LV_Measurements.svg new file mode 100644 index 00000000..9a879a44 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/LV_Measurements.svg @@ -0,0 +1,454 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/LV_Measurements2.svg b/docs_vitepress/src/lectures/lecture_12/LV_Measurements2.svg new file mode 100644 index 00000000..0928aeee --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/LV_Measurements2.svg @@ -0,0 +1,454 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/LV_Quadrics.svg b/docs_vitepress/src/lectures/lecture_12/LV_Quadrics.svg new file mode 100644 index 00000000..1d009506 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/LV_Quadrics.svg @@ -0,0 +1,460 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/LV_ensemble.svg b/docs_vitepress/src/lectures/lecture_12/LV_ensemble.svg new file mode 100644 index 00000000..8493de37 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/LV_ensemble.svg @@ -0,0 +1,2368 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/cubature.png b/docs_vitepress/src/lectures/lecture_12/cubature.png new file mode 100644 index 00000000..b1793dcb Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_12/cubature.png differ diff --git a/docs_vitepress/src/lectures/lecture_12/euler.jpg b/docs_vitepress/src/lectures/lecture_12/euler.jpg new file mode 100644 index 00000000..0c20ec1f Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_12/euler.jpg differ diff --git a/docs_vitepress/src/lectures/lecture_12/hw.md b/docs_vitepress/src/lectures/lecture_12/hw.md new file mode 100644 index 00000000..ef84c689 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/hw.md @@ -0,0 +1,136 @@ +# [Homework 12 - The Runge-Kutta ODE Solver](@id hw12) + +There exist many different ODE solvers. To demonstrate how we can get +significantly better results with a simple update to `Euler`, you will +implement the second order Runge-Kutta method `RK2`: + +```math +\begin{align*} +\tilde x_{n+1} &= x_n + hf(x_n, t_n)\\ + x_{n+1} &= x_n + \frac{h}{2}(f(x_n,t_n)+f(\tilde x_{n+1},t_{n+1})) +\end{align*} +``` + +`RK2` is a 2nd order method. It uses not only $f$ (the slope at a given point), +but also $f'$ (the derivative of the slope). With some clever manipulations you +can arrive at the equations above with make use of $f'$ without needing an +explicit expression for it (if you want to know how, see +[here](https://web.mit.edu/10.001/Web/Course_Notes/Differential_Equations_Notes/node5.html)). +Essentially, `RK2` computes an initial guess $\tilde x_{n+1}$ to then average +the slopes at the current point $x_n$ and at the guess $\tilde x_{n+1}$ which +is illustarted below. +![rk2](rk2.png) + +The code from the lab that you will need for this homework is given below. +As always, put all your code in a file called `hw.jl`, zip it, and upload it +to BRUTE. + +```@example hw +struct ODEProblem{F,T<:Tuple{Number,Number},U<:AbstractVector,P<:AbstractVector} + f::F + tspan::T + u0::U + θ::P +end + +abstract type ODESolver end + +struct Euler{T} <: ODESolver + dt::T +end + +function (solver::Euler)(prob::ODEProblem, u, t) + f, θ, dt = prob.f, prob.θ, solver.dt + (u + dt*f(u,θ), t+dt) +end + +function solve(prob::ODEProblem, solver::ODESolver) + t = prob.tspan[1]; u = prob.u0 + us = [u]; ts = [t] + while t < prob.tspan[2] + (u,t) = solver(prob, u, t) + push!(us,u) + push!(ts,t) + end + ts, reduce(hcat,us) +end + +# Define & Solve ODE + +function lotkavolterra(x,θ) + α, β, γ, δ = θ + x₁, x₂ = x + + dx₁ = α*x₁ - β*x₁*x₂ + dx₂ = δ*x₁*x₂ - γ*x₂ + + [dx₁, dx₂] +end +nothing # hide +``` + +::: danger Homework (2 points) + +Implement the 2nd order Runge-Kutta solver according to the equations given above +by overloading the call method of a new type `RK2`. + +```julia +(solver::RK2)(prob::ODEProblem, u, t) +``` + +::: + +```@setup hw +struct RK2{T} <: ODESolver + dt::T +end + +function (solver::RK2)(prob::ODEProblem, u, t) + f, θ, dt = prob.f, prob.θ, solver.dt + du = f(u,θ) + uh = u + du*dt + u + dt/2*(du + f(uh,θ)), t+dt +end +``` + +You should be able to use it exactly like our `Euler` solver before: + +```@example hw +using Plots +using JLD2 + +# Define ODE +function lotkavolterra(x,θ) + α, β, γ, δ = θ + x₁, x₂ = x + + dx₁ = α*x₁ - β*x₁*x₂ + dx₂ = δ*x₁*x₂ - γ*x₂ + + [dx₁, dx₂] +end + +θ = [0.1,0.2,0.3,0.2] +u0 = [1.0,1.0] +tspan = (0.,100.) +prob = ODEProblem(lotkavolterra,tspan,u0,θ) + +# load correct data +true_data = load("lotkadata.jld2") + +# create plot +p1 = plot(true_data["t"], true_data["u"][1,:], lw=4, ls=:dash, alpha=0.7, + color=:gray, label="x Truth") +plot!(p1, true_data["t"], true_data["u"][2,:], lw=4, ls=:dash, alpha=0.7, + color=:gray, label="y Truth") + +# Euler solve +(t,X) = solve(prob, Euler(0.2)) +plot!(p1,t,X[1,:], color=3, lw=3, alpha=0.8, label="x Euler", ls=:dot) +plot!(p1,t,X[2,:], color=4, lw=3, alpha=0.8, label="y Euler", ls=:dot) + +# RK2 solve +(t,X) = solve(prob, RK2(0.2)) +plot!(p1,t,X[1,:], color=1, lw=3, alpha=0.8, label="x RK2") +plot!(p1,t,X[2,:], color=2, lw=3, alpha=0.8, label="y RK2") +``` \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_12/lab-ode.jl b/docs_vitepress/src/lectures/lecture_12/lab-ode.jl new file mode 100644 index 00000000..44617a12 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/lab-ode.jl @@ -0,0 +1,49 @@ +struct ODEProblem{F,T<:Tuple{Number,Number},U<:AbstractVector,P<:AbstractVector} + f::F + tspan::T + u0::U + θ::P +end + + +abstract type ODESolver end + +struct Euler{T} <: ODESolver + dt::T +end + +function (solver::Euler)(prob::ODEProblem, u, t) + f, θ, dt = prob.f, prob.θ, solver.dt + (u + dt*f(u,θ), t+dt) +end + + +function solve(prob::ODEProblem, solver::ODESolver) + t = prob.tspan[1]; u = prob.u0 + us = [u]; ts = [t] + while t < prob.tspan[2] + (u,t) = solver(prob, u, t) + push!(us,u) + push!(ts,t) + end + ts, reduce(hcat,us) +end + + +# Define & Solve ODE + +function lotkavolterra(x,θ) + α, β, γ, δ = θ + x₁, x₂ = x + + dx₁ = α*x₁ - β*x₁*x₂ + dx₂ = δ*x₁*x₂ - γ*x₂ + + [dx₁, dx₂] +end + +θ = [0.1,0.2,0.3,0.2] +u0 = [1.0,1.0] +tspan = (0.,100.) +prob = ODEProblem(lotkavolterra,tspan,u0,θ) + diff --git a/docs_vitepress/src/lectures/lecture_12/lab.jl b/docs_vitepress/src/lectures/lecture_12/lab.jl new file mode 100644 index 00000000..43e19056 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/lab.jl @@ -0,0 +1,182 @@ +using Zygote + +struct GaussNum{T<:Real} <: Real + μ::T + σ::T +end +mu(x::GaussNum) = x.μ +sig(x::GaussNum) = x.σ +GaussNum(x,y) = GaussNum(promote(x,y)...) +±(x,y) = GaussNum(x,y) +Base.convert(::Type{T}, x::T) where T<:GaussNum = x +Base.convert(::Type{GaussNum{T}}, x::Number) where T = GaussNum(x,zero(T)) +Base.promote_rule(::Type{GaussNum{T}}, ::Type{S}) where {T,S} = GaussNum{T} +Base.promote_rule(::Type{GaussNum{T}}, ::Type{GaussNum{T}}) where T = GaussNum{T} + +# convert(GaussNum{Float64}, 1.0) |> display +# promote(GaussNum(1.0,1.0), 2.0) |> display +# error() + +#+(x::GaussNum{T},a::T) where T =GaussNum(x.μ+a,x.σ) +#+(a::T,x::GaussNum{T}) where T =GaussNum(x.μ+a,x.σ) +#-(x::GaussNum{T},a::T) where T =GaussNum(x.μ-a,x.σ) +#-(a::T,x::GaussNum{T}) where T =GaussNum(x.μ-a,x.σ) +#*(x::GaussNum{T},a::T) where T =GaussNum(x.μ*a,a*x.σ) +#*(a::T,x::GaussNum{T}) where T =GaussNum(x.μ*a,a*x.σ) + + +# function Base.:*(x1::GaussNum, x2::GaussNum) +# f(x1,x2) = x1 * x2 +# s1 = Zygote.gradient(μ -> f(μ,x2.μ), x1.μ)[1]^2 * x1.σ^2 +# s2 = Zygote.gradient(μ -> f(x1.μ,μ), x2.μ)[1]^2 * x2.σ^2 +# GaussNum(f(x1.μ,x2.μ), sqrt(s1+s2)) +# end + +function _uncertain(f, args::GaussNum...) + μs = [x.μ for x in args] + dfs = Zygote.gradient(f,μs...) + σ = map(zip(dfs,args)) do (df,x) + df^2 * x.σ^2 + end |> sum |> sqrt + GaussNum(f(μs...), σ) +end + +function _uncertain(expr::Expr) + if expr.head == :call + :(_uncertain($(expr.args[1]), $(expr.args[2:end]...))) + else + error("Expression has to be a :call") + end +end + +macro uncertain(expr) + _uncertain(expr) +end + +getmodule(f) = first(methods(f)).module + +function _register(func::Symbol) + mod = getmodule(eval(func)) + :($(mod).$(func)(args::GaussNum...) = _uncertain($func, args...)) +end + +function _register(funcs::Expr) + Expr(:block, map(_register, funcs.args)...) +end + +macro register(funcs) + _register(funcs) +end + +@register - + * + +f(x,y) = x+y*x + + +# @register * +# @register + +# @register - +# @register f + +asdf(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ*x2.μ, sqrt((x2.μ*x1.σ).^2 + (x1.μ * x2.σ).^2)) +gggg(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ+x2.μ, sqrt(x1.σ.^2 + x2.σ.^2)) + +x1 = GaussNum(rand(),rand()) +x2 = GaussNum(rand(),rand()) + +display(x1*x2) +display(asdf(x1,x2)) +display(_uncertain(*,x1,x2)) +display(@uncertain x1*x2) + +display(x1-x2) +display(x1+x2) +display(f(x1,x2)) +#error() + + +using Plots +using JLD2 + +abstract type AbstractODEProblem end + +struct ODEProblem{F,T,U,P} <: AbstractODEProblem + f::F + tspan::T + u0::U + θ::P +end + +abstract type ODESolver end +struct Euler{T} <: ODESolver + dt::T +end +struct RK2{T} <: ODESolver + dt::T +end + +function f(x,θ) + α, β, γ, δ = θ + x₁, x₂ = x + + dx₁ = α*x₁ - β*x₁*x₂ + dx₂ = δ*x₁*x₂ - γ*x₂ + + [dx₁, dx₂] +end + +function solve(prob::AbstractODEProblem, solver::ODESolver) + t = prob.tspan[1]; u = prob.u0 + us = [u]; ts = [t] + while t < prob.tspan[2] + (u,t) = solver(prob, u, t) + push!(us,u) + push!(ts,t) + end + ts, reduce(hcat,us) +end + +function (solver::Euler)(prob::ODEProblem, u, t) + f, θ, dt = prob.f, prob.θ, solver.dt + (u + dt*f(u,θ), t+dt) +end + +function (solver::RK2)(prob::ODEProblem, u, t) + f, θ, dt = prob.f, prob.θ, solver.dt + uh = u + f(u,θ)*dt + u + dt/2*(f(u,θ) + f(uh,θ)), t+dt +end + + +@recipe function plot(ts::AbstractVector, xs::AbstractVector{<:GaussNum}) + # you can set a default value for an attribute with `-->` + # and force an argument with `:=` + μs = [x.μ for x in xs] + σs = [x.σ for x in xs] + @series begin + :seriestype := :path + # ignore series in legend and color cycling + primary := false + linecolor := nothing + fillcolor := :lightgray + fillalpha := 0.5 + fillrange := μs .- σs + # ensure no markers are shown for the error band + markershape := :none + # return series data + ts, μs .+ σs + end + ts, μs +end + +θ = [0.1,0.2,0.3,0.2] +u0 = [GaussNum(1.0,0.1),GaussNum(1.0,0.1)] +tspan = (0.,100.) +dt = 0.1 +prob = ODEProblem(f,tspan,u0,θ) + +t,X=solve(prob, RK2(0.2)) +p1 = plot(t, X[1,:], label="x", lw=3) +plot!(p1, t, X[2,:], label="y", lw=3) + +display(p1) diff --git a/docs_vitepress/src/lectures/lecture_12/lab.md b/docs_vitepress/src/lectures/lecture_12/lab.md new file mode 100644 index 00000000..4a076bf3 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/lab.md @@ -0,0 +1,427 @@ +# [Lab 12 - Differential Equations](@id lab12) + +In this lab you will implement a simple solver for *ordinary differential +equations* (ODE) as well as a less verbose version of the `GaussNum`s that were +introduced in the lecture. + +## Euler ODE Solver + +In this first part you will implement your own, simple, ODE framwork (feel free +to make it a package;) in which you can easily specify different ODE solvers. +The API is heavily inspired by [`DifferentialEquations.jl`](https://diffeq.sciml.ai/stable/), +so if you ever need to use it, you will already have a feeling for how it works. + +Like in the lecture, we want to be able to specify an ODE like below. + +```@example lab +function lotkavolterra(x,θ) + α, β, γ, δ = θ + x₁, x₂ = x + + dx₁ = α*x₁ - β*x₁*x₂ + dx₂ = δ*x₁*x₂ - γ*x₂ + + [dx₁, dx₂] +end +nothing # hide +``` + +In the lecture we then solved it with a `solve` function that received all necessary +arguments to fully specify how the ODE should be solved. The number of necessary arguments +to `solve` can quickly become very large, so we will introduce a new API for `solve` +which will always take only two arguments: `solve(::ODEProblem, ::ODESolver)`. +The `solve` function will only do some book-keeping and call the solver until +the ODE is solved for the full `tspan`. + +The `ODEProblem` will contain all necessary parameters to fully specify the ODE +that should be solved. In our case that is the function `f` that defines the +ODE itself, initial conditions `u0`, ODE parameters `θ`, and the time domain of +the ODE `tspan`: + +```@example lab +struct ODEProblem{F,T<:Tuple{Number,Number},U<:AbstractVector,P<:AbstractVector} + f::F + tspan::T + u0::U + θ::P +end +``` + +The solvers will all be subtyping the abstract type `ODESolver`. The `Euler` solver +from the lecture will need one field `dt` which specifies its time step: + +```@example lab +abstract type ODESolver end + +struct Euler{T} <: ODESolver + dt::T +end +``` + +::: warning Exercise + +Overload the call-method of `Euler` + +```julia +(solver::Euler)(prob::ODEProblem, u, t) +``` + +such that calling the solver with an `ODEProblem` will perform one step of the +Euler solver and return updated ODE varialbes `u1` and the corresponding +timestep `t1`. + +::: + +::: details Show solution + +```@example lab +function (solver::Euler)(prob::ODEProblem, u, t) + f, θ, dt = prob.f, prob.θ, solver.dt + (u + dt*f(u,θ), t+dt) +end +``` + +::: + +```@example lab +# define ODEProblem +θ = [0.1,0.2,0.3,0.2] +u0 = [1.0,1.0] +tspan = (0.,100.) +prob = ODEProblem(lotkavolterra,tspan,u0,θ) + +# run one solver step +solver = Euler(0.2) +(u1,t1) = solver(prob,u0,0.) +``` + +::: warning Exercise + +Implement the function `solve(::ODEProblem,::ODESolver)` which calls the solver +as many times as are necessary to solve the ODE for the full time domain. +`solve` should return a vector of timesteps and a corresponding matrix of +variables. + +::: + +::: details Show solution + +```@example lab +function solve(prob::ODEProblem, solver::ODESolver) + t = prob.tspan[1]; u = prob.u0 + us = [u]; ts = [t] + while t < prob.tspan[2] + (u,t) = solver(prob, u, t) + push!(us,u) + push!(ts,t) + end + ts, reduce(hcat,us) +end +nothing # hide +``` + +::: + +You can load the true solution and compare it in a plot like below. The file +that contains the correct solution is located here: +[`lotkadata.jld2`](https://github.com/JuliaTeachingCTU/Scientific-Programming-in-Julia/blob/master/docs/src/lecture_12/lotkadata.jld2). + +```@example lab +using JLD2 +using Plots + +true_data = load("lotkadata.jld2") + +p1 = plot(true_data["t"], true_data["u"][1,:], lw=4, ls=:dash, alpha=0.7, color=:gray, label="x Truth") +plot!(p1, true_data["t"], true_data["u"][2,:], lw=4, ls=:dash, alpha=0.7, color=:gray, label="y Truth") + +(t,X) = solve(prob, Euler(0.2)) + +plot!(p1,t,X[1,:], color=1, lw=3, alpha=0.8, label="x Euler") +plot!(p1,t,X[2,:], color=2, lw=3, alpha=0.8, label="y Euler") +``` + +As you can see in the plot above, the Euler method quickly becomes quite +inaccurate because we make a step in the direction of the tangent which inevitably +leads us away from the perfect solution as shown in the plot below. +![euler](euler.jpg) + +In the [homework](@ref hw12) you will implement a Runge-Kutta solver to get a +much better accuracy with the same step size. + +## Automating `GaussNum`s + +Next you will implement your own uncertainty propagation. In the lecture you +have already seen the new number type that we need for this: + +```@example lab +struct GaussNum{T<:Real} <: Real + μ::T + σ::T +end +``` + +::: warning Exercise (tiny) + +Overload the `±` (type: `\pm`) symbol to define `GaussNum`s like this: `2.0 ± 1.0`. +Additionally, overload the `show` function such that `GaussNum`s are printed +with the `±` as well. + +::: + +::: details Show solution + +```@example lab +±(x,y) = GaussNum(x,y) +Base.show(io::IO, x::GaussNum) = print(io, "$(x.μ) ± $(x.σ)") +``` + +::: + +Recall, that for a function $f(\bm x)$ with $N$ inputs, the uncertainty $\sigma_f$ +is defined by + +```math +\sigma_f = \sqrt{\sum_{i=1}^N \left( \frac{df}{dx_i}\sigma_i \right)^2} +``` + +To make `GaussNum`s work for arithmetic operations we could +manually implement all desired functions as we started doing in the lecture. +With the autodiff package `Zygote` we can automate the generation of these +functions. In the next two exercises you will implement a macro `@register` +that takes a function and defines the corresponding uncertainty propagation +rule according to the equation above. + +::: warning Exercise + +Implement a helper function `uncertain(f, args::GaussNum...)` which takes a +function `f` and its `args` and returns the resulting `GaussNum` with an +uncertainty defined by the equation above. + +**Hint**: You can compute the gradient of a function with Zygote, for example: + +```@repl lab +using Zygote; + +f(x,y) = x*y; +Zygote.gradient(f, 2., 3.) +``` + +::: + +::: details Show solution + +```@example lab +function uncertain(f, args::GaussNum...) + μs = (x.μ for x in args) + dfs = Zygote.gradient(f, μs...) + + σ² = mapreduce(+, zip(dfs,args)) do (df,x) + (df * x.σ)^2 + end + + GaussNum(f(μs...), sqrt(σ²)) +end +nothing # hide +``` + +::: + +Now you can propagate uncertainties through any function like this: + +```@repl lab +x1 = 2.0 ± 2.0 +x2 = 2.0 ± 2.0 +uncertain(*, x1, x2) +``` + +You can verify the correctness of your implementation by comparing to the manual +implementation from the lecture. + +::: warning Exercise + +For convenience, implement the macro `@register` which will define the +uncertainty propagation rule for a given function. E.g. for the function `*` +the macro should generate code like below + +```julia +Base.:*(args::GaussNum...) = uncertain(*, args...) +``` + +**Hint**: If you run into trouble with module names of functions you can make use of + +```@repl lab +getmodule(f) = first(methods(f)).module +getmodule(*) +``` + +::: + +::: details Show solution + +```@example lab +function _register(func::Symbol) + mod = getmodule(eval(func)) + :($(mod).$(func)(args::GaussNum...) = uncertain($func, args...)) +end + +function _register(funcs::Expr) + Expr(:block, map(_register, funcs.args)...) +end + +macro register(funcs) + _register(funcs) +end +nothing # hide +``` + +::: + +Lets register some arithmetic functions and see if they work + +```@repl lab +@register * +x1 * x2 +@register - + +x1 + x2 +x1 - x2 +``` + +To finalize the definition of our new `GaussNum` we can define conversion and +promotion rules such that we do not have to define things like + +```julia ++(x::GaussNum, y::Real) = ... ++(x::Real, y::GaussNum) = ... +``` + +::: warning Exercise + +Define `convert` and `promote_rule`s such that you can perform arithmetic operations +on `GaussNum`s and other `Real`s. + +**Hint**: When converting a normal number to a `GaussNum` you can set the standard deviation +to zero. + +::: + +::: details Show solution + +```@example lab +Base.convert(::Type{T}, x::T) where T<:GaussNum = x +Base.convert(::Type{GaussNum{T}}, x::Real) where T = GaussNum(x,zero(T)) +Base.promote_rule(::Type{GaussNum{T}}, ::Type{S}) where {T,S} = GaussNum{T} +Base.promote_rule(::Type{GaussNum{T}}, ::Type{GaussNum{T}}) where T = GaussNum{T} +``` + +::: + +You can test if everything works by adding/multiplying floats to `GuassNum`s. + +```@repl lab +1.0±1.0 + 2.0 +[1.0±0.001, 2.0] +``` + +### Propagating Uncertainties through ODEs + +::: warning Exercise + +With our newly defined `GaussNum` we can easily propagate uncertainties through +our ODE solvers without changing a single line of their code. Try it! + +::: + +::: details Show solution + +```@example lab +θ = [0.1±0.001, 0.2, 0.3, 0.2] +u0 = [1.0±0.1, 1.0±0.1] +tspan = (0.,100.) +dt = 0.1 +prob = ODEProblem(lotkavolterra,tspan,u0,θ) + +t, X = solve(prob, Euler(0.1)) +``` + +::: + +::: warning Exercise + +Create a plot that takes a `Vector{<:GaussNum}` and plots the mean surrounded +by the uncertainty. + +::: + +::: details Show solution + +```@repl lab +mu(x::GaussNum) = x.μ +sig(x::GaussNum) = x.σ + +function uncertainplot(t, x::Vector{<:GaussNum}) + p = plot( + t, + mu.(x) .+ sig.(x), + xlabel = "x", + ylabel = "y", + fill = (mu.(x) .- sig.(x), :lightgray, 0.5), + linecolor = nothing, + primary = false, # no legend entry + ) + + # add the data to the plots + plot!(p, t, mu.(X[1,:])) + + return p +end +``` + +::: + +```@example lab +uncertainplot(t, X[1,:]) +``` + +Unfortunately, with this approach, we would have to define things like `uncertainplot!` +and `kwargs` to the function by hand. +To make plotting `GaussNum`s more pleasant we can make use of the `@recipe` +macro from `Plots.jl`. It allows to define plot recipes for custom types +(without having to depend on Plots.jl). Additionally, it makes it easiert to +support all the different ways of creating plots (e.g. via `plot` or `plot!`, +and with support for all keyword args) without having to overload tons of +functions manually. If you want to read more about plot recipies in the docs +of [`RecipesBase.jl`](http://juliaplots.org/RecipesBase.jl/stable/). +An example of a recipe for vectors of `GaussNum`s could look like this: + +```@example lab +@recipe function plot(ts::AbstractVector, xs::AbstractVector{<:GaussNum}) + # you can set a default value for an attribute with `-->` + # and force an argument with `:=` + μs = [x.μ for x in xs] + σs = [x.σ for x in xs] + @series begin + :seriestype := :path + # ignore series in legend and color cycling + primary := false + linecolor := nothing + fillcolor := :lightgray + fillalpha := 0.5 + fillrange := μs .- σs + # ensure no markers are shown for the error band + markershape := :none + # return series data + ts, μs .+ σs + end + ts, μs +end + +# now we can easily plot multiple things on to of each other +p1 = plot(t, X[1,:], label="x", lw=3) +plot!(p1, t, X[2,:], label="y", lw=3) +``` + +# References + +* [MIT18-330S12: Chapter 5](https://ocw.mit.edu/courses/mathematics/18-330-introduction-to-numerical-analysis-spring-2012/lecture-notes/MIT18_330S12_Chapter5.pdf) +* [RK2 derivation](https://web.mit.edu/10.001/Web/Course_Notes/Differential_Equations_Notes/node5.html) \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_12/lecture.md b/docs_vitepress/src/lectures/lecture_12/lecture.md new file mode 100644 index 00000000..dcdae933 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/lecture.md @@ -0,0 +1,373 @@ +# [Uncertainty Propagation in Ordinary Differential Equations](@id lec12) + +Differential equations are commonly used in science to describe many aspects of the physical world, ranging from dynamical systems and curves in space to complex multi-physics phenomena. + +As an example, consider a simple non-linear ordinary differential equation: + +```math +\begin{align} +\dot{x}&=\alpha x-\beta xy,\\\dot{y}&=-\delta y+\gamma xy, +\end{align} +``` + +Which describes behavior of a predator-pray models in continuous times: + +- x is the population of prey (sheep), +- y is the population of predator (wolfes) +- derivatives represent instantaneous growth rates of the populations +- ``t`` is the time and ``\alpha, \beta, \gamma, \delta`` are parameters. + +Can be written in vector arguments ``\mathbf{x}=[x,y]``: + +```math +\frac{d\mathbf{x}}{dt}=f(\mathbf{x},\theta) +``` + +with arbitrary function ``f`` with vector of parameters ``\theta``. + +The first steps we may want to do with an ODE is to see it's evolution in time. The most simple approach is to discretize the time axis into steps: +``t = [t_1, t_2, t_3, \ldots t_T]`` +and evaluate solution at these points. + +Replacing derivatives by differences: + +```math +\dot x \leftarrow \frac{x_t-x_{t-1}}{\Delta t} +``` + +we can derive a general scheme (Euler solution): + +```math +\mathbf{x}_t = \mathbf{x}_{t-1} + \Delta{}t f(\mathbf{x}_t,\theta) +``` + +which can be written genericaly in julia : + +```julia +function f(x,θ) + α,β,γ,δ = θ + x1,x2=x + dx1 = α*x1 - β*x1*x2 + dx2 = δ*x1*x2 - γ*x2 + [dx1,dx2] +end + +function solve(f,x0::AbstractVector,θ,dt,N) + X = hcat([zero(x0) for i=1:N]...) + X[:,1]=x0 + for t=1:N-1 + X[:,t+1]=X[:,t]+dt*f(X[:,t],θ) + end + X +end +``` + +Is simple and working (with sufficienty small ``dt``): + +![](lotka.svg) + +ODE of this kind is an example of a "complex" simulation code that we may want to use, interact with, modify or incorporate into a more complex scheme. + +- we will test how to re-define the elementary operations using custom types, automatic differentiation and automatic code generation +- we will redefine the plotting operation to display the new type correctly +- we will use composition to incorporate the ODE into a more complex solver + +## Uncertainty propagation + +Prediction of the ODE model is valid only if all parameters and all initial conditions are accurate. This is almost never the case. While the number of sheep can be known, the number of wolfes in a forest is more uncertain. The same model holds for predator-prey in insects where the number of individuals can be only estimated. + +Uncertain initial conditions: + +- number of predators and prey given by a probability distribution +- interval ``[0.8,1.2]`` corresponds to uniform distribution ``U(0.8,1.2)`` +- gaussian ``N(\mu,\sigma)``, with mean ``\mu`` and standard deviation ``\sigma`` e.g. ``N(1,0.1)`` +- more complicated distributions are more realistic (the number of animals is not negative!) + +### Ensemble approach + +The most simple approach is to represent distribution by an empirical density = discrete samples. + +```math +p(\mathbf{x})\approx \frac{1}{K}\sum_{k=1}^{K} \delta(\mathbf{x}-\mathbf{x}^{(k)}) +``` + +In the case of a Gaussian, we just sample: + +```julia +K = 10 +X0 = [x0 .+ 0.1*randn(2) for _=1:K] # samples of initial conditions +Xens=[X=solve(f,X0[i],θ0,dt,N) for i=1:K] # solve multiple times +``` + +(can be implemented more elegantly using multiple dispatch on Vector{Vector}) + +![](LV_ensemble.svg) + +While it is very simple and universal, it may become hard to interpret. + +- What is the probability that it will higher than ``x_{max}``? +- Improving accuracy with higher number of samples (expensive!) + +### Propagating a Gaussian + +Propagation of uncertainty has been studied in many areas of science. Relation between accuracy and computational speed is always a tradeoff. + +A common appoach to propagation of uncertainty is linearized Gaussian: + +- variable ``x`` is represented by gaussian ``N(\mu,\sigma)`` +- transformation of addition: ``x+a\sim N(\mu+a,\sigma)`` +- transformation of multiplication: ``a*x\sim N(a*\mu,a*\sigma)`` +- general transformation approximated: + +```math +g(x)\sim N(g(\mu),g'(\mu)*\sigma) +``` + +This can be efficienty implemented in Julia: + +```julia +import Base: +, * + +struct GNum{T} where T<:Real + μ::T + σ::T +end + ++(x::GaussNum{T},a::T) where T =GaussNum(x.μ+a,x.σ) ++(a::T,x::GaussNum{T}) where T =GaussNum(x.μ+a,x.σ) +*(x::GaussNum{T},a::T) where T =GaussNum(x.μ*a,a*x.σ) +*(a::T,x::GaussNum{T}) where T =GaussNum(x.μ*a,a*x.σ) +``` + +For the ODE we need multiplication of two Gaussians. Using Taylor expansion and neglecting covariances: + +```math +g(x_1,x_2)=N\left(g(\mu_1,\mu_2), \sqrt{\left(\frac{dg}{dx_1}(\mu_1,\mu_2)\sigma_1\right)^2 + \left(\frac{dg}{dx_2}(\mu_1,\mu_2)\sigma_2\right)^2}\right) +``` + +which trivially applies to sum: ``x_1+x_2=N(\mu_1+\mu_2, \sqrt{\sigma_1^2 + \sigma_2^2})`` + +```julia ++(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ+x2.μ,sqrt(x1.σ.^2 + x2.σ.^2)) +*(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ*x2.μ, sqrt(x2.μ*x1.σ.^2 + x1.μ*x2.σ.^2)) +``` + +Following the principle of defining the necessary functions on the type, we can make it pass through the ODE: + +- it is necessary to define new initialization (functions `zero`) +- define nice-looking constructor (``±``) + + ```julia + ±(a::T,b::T) where T: can be automated (macro, generated functions) + +### Flexibility + +The great advantage of the former model was the ability to run an arbitrary code with uncertainty at an arbitrary number. + +For example, we may know the initial conditions, but do not know the parameter value. + +```julia +GX=solve(f,[1.0±0.1,1.0±0.1],[0.1±0.1,0.2,0.3,0.2],0.1,1000) +``` + +![](LV_GaussNum2.svg) + +### Disadvantage + +The result does not correspond to the ensemble version above. + +- we have ignored the covariances +- extension to version with covariances is possible by keeping track of the correlations (`Measurements.jl`), where other variables are stored in a dictionary: + - correlations found by language manipulations + - very flexible and easy-to-use + - discovering the covariances requires to build the covariance from `ids`. (Expensive if done too often). + +## Vector uncertainty + +The previous simple approach ignores the covariances between variables. Even if we tract covariances linearly in the same fashion (``Measurements.jl``), the approach will suffer from a loss of precision under non-linearity. + + +![](https://photos1.blogger.com/blogger/5955/293/1600/unscented-transform-explained.jpg) + +- The linearization-based approach propogates through the non-linearity only the mean and models its neighborhood by a plane. +- Propagating all samples is too expensive +- Methods based on quadrature or cubature rules are a compromise + + +The cubature approach is based on moment matching: + +```math +\mu_g = \int g(x) p(x) dx +``` + +for which is ``g(\mu)`` poor approximation, corresponding to: + +```math +\mu_g = g(\mu) = \int g(x) \delta(x-\mu) dx +``` + +For Gaussian distribution, we can use a smarter integration rule, called the Gauss-Hermite quadrature: + +```math +\mu_g = \int g(x) p(x) dx \approx \sum_{j=1}^J w_j g(x_j) +``` + +where ``x_j`` are prescribed quadrature points (see e.g. ![online tables](https://www.efunda.com/math/num_integration/findgausshermite.cfm)) + +In multivariate setting, the same problem is typically solved with the aim to reduce the computational cost to linear complexity with dimension. Most often aimimg at ``O(2d)`` complexity where ``d`` is the dimension of vector ``x``. + +One of the most popular approaches today is based on cubature rules approximating the Gaussian in radial-spherical coordinates. + +### Cubature rules + +Consider Gaussian distribution with mean ``\mu`` and covariance matrix ``\Sigma`` that is positive definite with square root ``\sqrt\Sigma``, such that ``\sqrt\Sigma \sqrt\Sigma^T=\Sigma``. The quadrature pints are: + +```math +x_i = \mu + \sqrt\Sigma q_i +``` + +```math +\begin{align} +q_{1}&=\sqrt{d}\begin{bmatrix}1\\ +0\\ +\vdots +\end{bmatrix} +& +q_{2}&=\sqrt{d}\begin{bmatrix}0\\ +1\\ +\vdots +\end{bmatrix} \ldots +& +q_{d+1}&=\sqrt{d}\begin{bmatrix}-1\\ +0\\ +\vdots +\end{bmatrix} +q_{d+2}&=\sqrt{d}\begin{bmatrix}0\\ +-1\\ +\vdots +\end{bmatrix} \ldots +\end{align} +``` + +that can be composed into a matrix ``Q=[q_1,\ldots q_{2d}]`` that is constant: + +```math +Q = \sqrt{d} [ I_d, -I_d] +``` + +![](cubature.png) + +Those quadrature points are in integration weighted by: + +```math +w_i = \frac{1}{2d}, i=1,\ldots,2d +``` + +where ``d`` is dimension of the vectors. + +The quadrature points are propogated through the non-linearity in parallel (``x_i'=g(x_i)``) and the resulting Gaussian distribution is: + +```math +\begin{align} +x' & \sim N(\mu',\Sigma')\\ +\mu' & = \frac{1}{2d}\sum_{j=1}^{2d} x'_i\\ +\Sigma &= \frac{1}{2d}\sum_{j=1}^{2d} (x'_i-\mu')^T (x'_i-\mu') +\end{align} +``` + +It is easy to check that if the sigma-points are propagated through an identity, they preserve the mean and variance. + +```math +\begin{align} +\mu' & = \frac{1}{2d}\sum_{j=1}^{2d} (\mu + \sqrt{\Sigma}q_i)\\ + & = \frac{1}{2d}(2d\mu + \sqrt{\Sigma} \sum_{j=1}^{2d} (q_i) + & = \mu +\end{align} +``` + +For our example: + +![](LV_Quadrics.svg) + +- only 4 trajectories propagated deterministically +- can not be implemented using a single number type + - the number of points to store is proportional to the dimension + - manipulation requires operations from linear algebra +- moving to representations in vector form + - simple for initial conditions, + - how to extend to operate also on parameters? + +### Smarter implementation + +Easiest solution is to put the corresponding parts of the problem together: + +- ode function ``f``, +- its state ``x0``, +- and parameters ``θ`` + +can be wrapped into an ODEProblem + +```julia +struct ODEProblem{F,T,X<:AbstractVector,P<:AbstractVector} + f::F + tspan::T + x0::X + θ::P +end +``` + +- the solver can operate on the ODEProbelm type + +### Unceratinty propagation in vectors + +Example: consider uncertainty in state ``[x_1,x_2]`` and the first parameter ``\theta_1``. + +Quick and dirty: + +```julia +getuncertainty(o::ODEProblem) = [o.u0[1:2];o.θ[1]] +setuncertainty!(o::ODEProblem,x::AbstractVector) = o.u0[1:2]=x[1:2],o.θ[1]=x[3] +``` + +and write a general Cubature solver using multiple dispatch. + +Practical issues: + +- how to check bounds? (Asserts) +- what if we provide an incompatible ODEProblem +- define a type that specifies the type of uncertainty? + +```julia +struct GaussODEProblem + mean::ODEProblem + unc_in_u # any indexing type accepted by to_index() + unc_in_θ + sqΣ0 +end +``` + +We can dispatch the cubature solver on GaussODEProblem and the ordinary ``solve`` on GaussODEProblem.OP internally. + +```julia +getmean(gop::GaussODEProblem) =[ gop.mean.x0[gop.unc_in_u];gop.mean.θ[gop.unc_in_θ]] +setmean!(gop::GaussODEProblem,x::AbstractVector) = begin + gop.mean.x0[gop.unc_in_u]=x[1:length(gop.unc_in_u)] + gop.mean.θ[gop.unc_in_θ]=x[length(gop.unc_in_u).+[1:length(gop.unc_in_θ)]] +end +``` + +Constructor accepts an ODEProblem with uncertain numbers and converts it to GaussODEProblem: + +- goes through ODEProblem ``x0`` and ``θ`` fields and checks their types +- replaces GaussNums in ODEProblem by ordinary numbers +- remembers indices of GaussNum in ``x0`` and ``θ`` +- copies standard deviations in GaussNum to ``sqΣ0`` \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_12/lotka.svg b/docs_vitepress/src/lectures/lecture_12/lotka.svg new file mode 100644 index 00000000..0481fb3d --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/lotka.svg @@ -0,0 +1,308 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs_vitepress/src/lectures/lecture_12/lotkadata.jl b/docs_vitepress/src/lectures/lecture_12/lotkadata.jl new file mode 100644 index 00000000..1b22a945 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/lotkadata.jl @@ -0,0 +1,25 @@ +using OrdinaryDiffEq +using JLD2 +#using Plots + +function f(x,θ,t) + α,β,γ,δ = θ + x1,x2=x + dx1 = α*x1 - β*x1*x2 + dx2 = δ*x1*x2 - γ*x2 + [dx1,dx2] +end + +θ0 = [0.1,0.2,0.3,0.2] +x0 = [1.0,1.0] +tspan = (0., 100.) +prob = ODEProblem(f,x0,tspan,θ0) +sol = solve(prob,Tsit5()) +#p = plot(sol) + +dt = 0.2 +ts = (tspan[1]):dt:(tspan[2]) +us = reduce(hcat, sol(ts).u) + +data = Dict(:u=>us, :t=>collect(ts)) +jldsave("lotkadata.jld2", u=us, t=collect(ts)) diff --git a/docs_vitepress/src/lectures/lecture_12/lotkadata.jld2 b/docs_vitepress/src/lectures/lecture_12/lotkadata.jld2 new file mode 100644 index 00000000..d6ac297a Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_12/lotkadata.jld2 differ diff --git a/docs_vitepress/src/lectures/lecture_12/ode.jl b/docs_vitepress/src/lectures/lecture_12/ode.jl new file mode 100644 index 00000000..7ca0cf53 --- /dev/null +++ b/docs_vitepress/src/lectures/lecture_12/ode.jl @@ -0,0 +1,151 @@ + +function f(x,θ) + α,β,γ,δ = θ + x1,x2=x + dx1 = α*x1 - β*x1*x2 + dx2 = δ*x1*x2 - γ*x2 + [dx1,dx2] +end + +function solve(f,x0::AbstractVector,θ,dt,N) + X = hcat([zero(x0) for i=1:N]...) + X[:,1]=x0 + for t=1:N-1 + X[:,t+1]=X[:,t]+dt*f(X[:,t],θ) + end + X +end + +θ0 = [0.1,0.2,0.3,0.2] +x0 = [1.0,1.0] +dt = 0.1 +N = 1000 +X=solve(f,x0,θ0,dt,N) + + +using Plots + +p=plot(X[1,:],xlabel="t",label="x",color=:blue) +p=plot!(X[2,:],xlabel="t",label="y",color=:red) + + +K=100 +X0 = [x0 .+ 0.1*randn(2) for k=1:K] +Xens=[X=solve(f,X0[i],θ0,dt,N) for i=1:K] + +for i=1:K + p=plot!(Xens[i][1,:],label="",color=:blue) + p=plot!(Xens[i][2,:],label="",color=:red) +end + +savefig("LV_ensemble.svg") + +using StatsBase +Xm=mean(Xens) +Xstd = std(Xens) + +struct GaussNum{T<:Real} + μ::T + σ::T +end +import Base: +, -, *, zero ++(x::GaussNum{T},a::T) where T =GaussNum(x.μ+a,x.σ) ++(a::T,x::GaussNum{T}) where T =GaussNum(x.μ+a,x.σ) +-(x::GaussNum{T},a::T) where T =GaussNum(x.μ-a,x.σ) +-(a::T,x::GaussNum{T}) where T =GaussNum(x.μ-a,x.σ) +*(x::GaussNum{T},a::T) where T =GaussNum(x.μ*a,a*x.σ) +*(a::T,x::GaussNum{T}) where T =GaussNum(x.μ*a,a*x.σ) + +# TODO +# sin(x::GaussNum)= @uncertain sin(x) + + ++(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ+x2.μ, sqrt(x1.σ.^2 + x2.σ.^2)) +-(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ-x2.μ, sqrt(x1.σ.^2 + x2.σ.^2)) +*(x1::GaussNum{T},x2::GaussNum{T}) where T =GaussNum(x1.μ*x2.μ, sqrt((x2.μ*x1.σ).^2 + (x1.μ * x2.σ).^2)) +zero(::Type{GaussNum{T}}) where T =GaussNum(zero(T),zero(T)) +zero(x::AbstractVector{T}) where T =[zero(T) for i=1:length(x)] + +function MV(x::AbstractArray{GaussNum{T}}) where T + M=similar(x,T) + V=similar(x,T) + for i=1:length(x) + M[i]=x[i].μ + V[i]=x[i].σ + end + (M,V) +end + +GX=solve(f,[GaussNum(1.0,0.1),GaussNum(1.0,0.1)],[0.1,0.2,0.3,0.2],0.1,1000) +M,V=MV(GX) +plot(M') +plot(M[1,1:30:end],errorbar=V[1,1:30:end],label="x",color=:blue) +plot!(M[2,1:30:end],errorbar=V[2,1:30:end],label="y",color=:red) + +savefig("LV_GaussNum.svg") + +GX=solve(f,[GaussNum(1.0,0.1),GaussNum(1.0,0.1)],[GaussNum(0.1,0.1),0.2,0.3,0.2],0.1,1000) +M,V=MV(GX) +plot(M') +plot(M[1,1:30:end],errorbar=V[1,1:30:end],label="x",color=:blue) +plot!(M[2,1:30:end],errorbar=V[2,1:30:end],label="y",color=:red) + +savefig("LV_GaussNum2.svg") + +using Measurements +MX=solve(f,[1.0±0.1,1.0±0.1],[0.1,0.2,0.3,0.2],0.1,1000) +plot(MX[1,1:30:end],label="x",color=:blue) +plot!(MX[2,1:30:end],label="y",color=:red) + +savefig("LV_Measurements.svg") + +MX=solve(f,[1.0±0.1,1.0±0.1],[0.1±0.01,0.2±0.01,0.3±0.01,0.2±0.01],0.1,1000) +plot(MX[1,1:30:end],label="x",color=:blue) +plot!(MX[2,1:30:end],label="y",color=:red) + +savefig("LV_Measurements2.svg") + + + +# Plot receipe +# plot(Vector{GaussNum}) + +using LinearAlgebra +function solve(f,x0::AbstractVector,sqΣ0, θ,dt,N,Nr) + n = length(x0) + n2 = 2*length(x0) + Qp = sqrt(n)*[I(n) -I(n)] + + X = hcat([zero(x0) for i=1:N]...) + S = hcat([zero(x0) for i=1:N]...) + X[:,1]=x0 + Xp = x0 .+ sqΣ0*Qp + sqΣ = sqΣ0 + Σ = sqΣ* sqΣ' + S[:,1]= diag(Σ) + for t=1:N-1 + if rem(t,Nr)==0 + Xp .= X[:,t] .+ sqΣ * Qp + end + for i=1:n2 # all quadrature points + Xp[:,i].=Xp[:,i] + dt*f(Xp[:,i],θ) + end + mXp=mean(Xp,dims=2) + X[:,t+1]=mXp + Σ=Matrix((Xp.-mXp)*(Xp.-mXp)'/n2) + S[:,t+1]=sqrt.(diag(Σ)) + # @show Σ + + sqΣ = cholesky(Σ).L + + end + X,S +end + +## Extension to arbitrary + +QX,QS=solve(f,[1.0,1.0],(0.1)*I(2),θ0,0.1,1000,1) +plot(QX[1,1:30:end],label="x",color=:blue,errorbar=QS[1,1:30:end]) +plot!(QX[2,1:30:end],label="y",color=:red,errorbar=QS[2,1:30:end]) + +savefig("LV_Quadrics.svg") \ No newline at end of file diff --git a/docs_vitepress/src/lectures/lecture_12/rk2.png b/docs_vitepress/src/lectures/lecture_12/rk2.png new file mode 100644 index 00000000..dc54463a Binary files /dev/null and b/docs_vitepress/src/lectures/lecture_12/rk2.png differ diff --git a/docs_vitepress/src/lectures/outline.md b/docs_vitepress/src/lectures/outline.md new file mode 100644 index 00000000..1d20fe13 --- /dev/null +++ b/docs_vitepress/src/lectures/outline.md @@ -0,0 +1,63 @@ +# Course outline + +## 1. Introduction + +## 2. Type system + +- user: tool for abstraction +- compiler: tool for memory layout + +## 3. Design patterns (mental setup) + +- Julia is a type-based language +- multiple-dispatch generalizes OOP and FP + +## 4. Packages + +- way how to organize code +- code reuse (alternative to libraries) +- experiment reproducibility + +## 5. Benchmarking + +- how to measure code efficiency + +## 6. Introspection + +- understand how the compiler process the data + +## 7. Macros + +- automate writing of boring the boilerplate code +- good macro create cleaner code + +## 8. Automatic Differentiation + +- Theory: difference between the forward and backward mode +- Implementation techniques + +## 9. Intermediate representation + +- how to use internal the representation of the code +- example in automatic differentiation + +## 10. Parallel computing + +- threads, processes + +## 11. Graphics card coding + +- types for GPU +- specifics of architectures + +## 12. Ordinary Differential Equations + +- simple solvers +- error propagation + +## 13. Data driven ODE + +- combine ODE with optimization +- automatic differentiation (adjoints) + + diff --git a/docs_vitepress/src/projects/projects.md b/docs_vitepress/src/projects/projects.md new file mode 100644 index 00000000..260b9c8c --- /dev/null +++ b/docs_vitepress/src/projects/projects.md @@ -0,0 +1,106 @@ +# Potential projects + +Below, we list some potential projects for inspiration. + +## Implementing new things + +### Lenia (Continuous Game of Life) + +[Lenia](https://chakazul.github.io/lenia.html#Code) is a continuous version of Conway's Game of +Life. Implement a Julia version. For example, you could focus either on performance compared to the +python version, or build nice visualizations with [Makie.jl](https://docs.makie.org/stable/). + +Nice tutorial [from Conway to Lenia](https://colab.research.google.com/github/OpenLenia/Lenia-Tutorial/blob/main/Tutorial_From_Conway_to_Lenia.ipynb) + +### The Equation Learner And Its Symbolic Representation + +In many scientific and engineering one searches for interpretable (i.e. +human-understandable) models instead of the black-box function approximators +that neural networks provide. +The [*equation learner*](http://proceedings.mlr.press/v80/sahoo18a.html) (EQL) +is one approach that can identify concise equations that describe a given +dataset. + +The EQL is essentially a neural network with different unary or binary +activation functions at each individual unit. The network weights are +regularized during training to obtain a sparse model which hopefully results in +a model that represents a simple equation. + +The goal of this project is to implement the EQL, and if there is enough time +the [*improved equation learner*](https://arxiv.org/abs/2105.06331) (iEQL). +The equation learners should be tested on a few toy problems (possibly inspired +by the tasks in the papers). Finally, you will implement functionality that +can transform the learned model into a symbolic, human readable, and executable +Julia expression. + +### Architecture visualizer + +Create an extension of Flux / Lux and to visualize architecture of a neural network suitable for publication. Something akin [PlotNeuralNet](https://github.com/HarisIqbal88/PlotNeuralNet). + +### Learning Large Language Models with reduced precition (Mentor: Tomas Pevny) + +Large Language Models ((Chat) GPT, LLama, Falcon, Palm, ...) are huge. A recent trend is to perform optimization in reduced precision, for example in `int8` instead of `Float32`. Such feature is currently missing in Julia ecosystem and this project should be about bringing this to the community (for an introduction, read these blogs [*LLM-int8 and emergent features*](https://timdettmers.com/2022/08/17/llm-int8-and-emergent-features/), [*A gentle introduction to 8-bit Matrix Multiplication*](https://huggingface.co/blog/hf-bitsandbytes-integration)). The goal would be to implement this as an additional type of Number / Matrix and overload multiplication on CPU (and ideally on GPU) to make it transparent for neural networks? **What I will learn?** In this project, you will learn a lot about the (simplicity of) implementation of deep learning libraries and you will practice abstraction of Julia's types. You can furthermore learn about GPU Kernel programming and `Transformers.jl` library. + +### Planning algorithms (Mentor: Tomas Pevny) + +Extend [SymbolicPlanners.jl](https://github.com/JuliaPlanners/SymbolicPlanners.jl) with the mm-ϵ variant of the bi-directional search [MM: A bidirectional search algorithm that is guaranteed to meet in the middle](https://www.sciencedirect.com/science/article/pii/S0004370217300905). This [pull request](https://github.com/JuliaPlanners/SymbolicPlanners.jl/pull/8) might be very helpful in understanding better the library. + +### A Rule Learning Algorithms (Mentor: Tomas Pevny) + +[Rule-based models](https://christophm.github.io/interpretable-ml-book/rules.html) +are simple and very interpretable models that have been around for a long time +and are gaining popularity again. +The goal of this project is to implement one of these algorithms +* [sequential covering](https://christophm.github.io/interpretable-ml-book/rules.html#sequential-covering) +algorithm called [`RIPPER`](http://www.cs.utsa.edu/~bylander/cs6243/cohen95ripper.pdf) +and evaluate it on a number of datasets. +* [Learning Certifiably Optimal Rule Lists for Categorical Data](https://arxiv.org/abs/1704.01701) +* [Boolean decision rules via column generation](https://proceedings.neurips.cc/paper/2018/file/743394beff4b1282ba735e5e3723ed74-Paper.pdf) +* [Learning Optimal Decision Trees with SAT](https://proceedings.neurips.cc/paper/2021/file/4e246a381baf2ce038b3b0f82c7d6fb4-Paper.pdf) +* [A SAT-based approach to learn explainable decision sets](https://link.springer.com/content/pdf/10.1007/978-3-319-94205-6_41.pdf) +To increase the impact of the project, consider interfacing it with [MLJ.jl](https://alan-turing-institute.github.io/MLJ.jl/dev/) + +### Parallel optimization (Mentor: Tomas Pevny) + +Implement one of the following algorithms to train neural networks in parallel. Can be implemented in a separate package or consider extending [FluxDistributed.jl](https://github.com/DhairyaLGandhi/FluxDistributed.jl). Do not forget to verify that the method actually works!!! +* [Hogwild!](https://proceedings.neurips.cc/paper/2011/file/218a0aefd1d1a4be65601cc6ddc1520e-Paper.pdf) +* [Local sgd with periodic averaging: Tighter analysis and adaptive synchronization](https://proceedings.neurips.cc/paper/2019/file/c17028c9b6e0c5deaad29665d582284a-Paper.pdf) +* [Distributed optimization for deep learning with gossip exchange](https://arxiv.org/abs/1804.01852) + +## Solve issues in existing projects: + +### Address issues in markov decision processes (Mentor: Jan Mrkos) + +Fix type stability issue in [MCTS.jl](https://github.com/JuliaPOMDP/MCTS.jl), prepare benchmarks, +and evaluate the impact of the changes. Details can be found in [this +issue](https://github.com/JuliaPOMDP/MCTS.jl/issues/59). This project will require learnind a little +bit about Markov Decision Processes if you don't know them already. + +If it sounds interesting, get in touch with lecturer/lab assistant, who will connect you with Jan Mrkos. + +### Extend HMil library with Retentative networks (Mentor: Tomas Pevny) + +[Retentative networks](https://arxiv.org/abs/2307.08621) were recently proposed as a low-cost alternative to Transformer models without sacrificing performance (according to authors). By implementing Retentative Networks, te HMil library will be able to learn sequences (not just sets), which might nicely extend its applicability. + +### Address issues in HMil/JsonGrinder library (Mentor: Simon Mandlik) + +These are open source toolboxes that are used internally in Avast. Lots of general functionality is done, but some love is needed in polishing. + +- refactor the codebase using package extensions (e.g. for FillArrays) +- improve compilation time (tracking down bottlenecks with SnoopCompile and using precompile directives from PrecompileTools.jl) + +Or study new metric learning approach on application in animation description + +- apply machine learning on slides within presentation provide by PowToon + +If it sounds interesting, get in touch with lecturer/lab assistant, who will connect you with Simon Mandlik. + +## [Former projects for your inspiration](@id former_projects) + +The following is a list of great projects of past years. + +- [NeuralCollaborativeFiltering.jl](https://github.com/poludmik/NeuralCollaborativeFiltering.jl) +- [OptimalTrainControl.jl](https://github.com/vtfanta/OptimalTrainControl_v2.jl) +- [Urban Traffic Control](https://github.com/Matyxus/UTC_jl) +- [Directed Evolution in Silico](https://github.com/soldamatlab/DESilico.jl) +- [ImageInspector.jl](https://github.com/JuliaTeachingCTU/ImageInspector.jl) (used in our bachelor course) \ No newline at end of file diff --git a/docs_vitepress/src/projects/requirements.md b/docs_vitepress/src/projects/requirements.md new file mode 100644 index 00000000..e790760a --- /dev/null +++ b/docs_vitepress/src/projects/requirements.md @@ -0,0 +1,68 @@ +# [Requirements](@id projects) + +The goal of the project should be to create something, which is actually useful. Therefore we offer a lot of freedom in how the project will look like with the condition that you should spent around 60 hours on it (this number was derived as follows: each credit is worth 30 hours minus 13 lectures + labs minus 10 homeworks 2 hours each) and you should demonstrate some skills in solving the project. In general, we can distinguish three types of project depending on the beneficiary: + + - **You benefit:** Use / try to solve a well known problem using Julia language, + - **Our group:** work with your tutors on a topic researched in the AIC group, + - **Julia community:** choose an issue in a registered Julia project you like and fix it (documentation issues are possible but the resulting documentation should be very nice.). + +The project should be of sufficient complexity that verify your skill of the language (to be agreed individually). + +## Project requirements + +The goal of the semestral project is to create a Julia pkg with **reusable**, +**properly tested** and **documented** code. We have given you some options of topics, +as well as the freedom to choose something that could be useful for your +research or other subjects. In general we are looking for something where +performance may be crucial such as data processing, optimization or equation +solving. + +In practice the project should roughly follow the structure below: + +```julia +. +├── scripts +│ ├── run_example.jl # one or more examples showing the capabilities of the pkg +│ ├── Project.toml # YOUR_PROJECT should be added here with develop command with rel path +│ └── Manifest.toml # should be committed as it allows to reconstruct the environment exactly +├── src +│ ├── YOUR_PROJECT.jl # ideally only some top level code such as imports and exports, rest of the code included from other files +│ ├── src1.jl # source files structured in some logical chunks +│ └── src2.jl +├── test +│ ├── runtest.jl # contains either all the tests or just includes them from other files +│ ├── Project.toml # lists some additional test dependencies +│ └── Manifest.toml # usually not committed to git as it is generated on the fly +├── docs +│ ├── Project.toml +│ ├── make.jl +│ └── src +│ └── index.md +├── README.md # describes in short what the pkg does and how to install pkg (e.g. some external deps) and run the example +├── Project.toml # lists all the pkg dependencies +└── Manifest.toml # usually not committed to git as the requirements may be to restrictive +``` + +Make sure that + +- `README.md` is present and contains general information about the package. A small example is a nice to have. +- The package can be installed trough the package manager as `Pkg.add("url of + the package")` with all and correct dependencies. Do not register the package + into an official registry if you are not willing to continue its development and + maintainance. +- Make sure that the package is covered by tests which are in the `test` folder. We + will try to run them. There is no need for 100% percent test coverage. Tests + testing the functionality are sufficient. +- The package should have basic documentation. For small packages, it is + sufficient to have documentation in readme. For larger pacakges, proper + documentation with `Documenter.jl` is advised. + +Only after all this we may look at the extent of the project and it's +difficulty, which may help us in deciding between grades. + +Nice to have things, which are not strictly required but obviously improves the score. + +- Ideally the project should be hosted on GitHub, which could have the continuous integration/testing set up. +- Include some benchmark and profiling code in your examples, which can show us how well you have dealt with the question of performance. +- Some parallelization attempts either by multi-processing, multi-threading, or CUDA. Do not forget to show the improvement. +- Documentation with a webpage using Documenter.jl. diff --git a/docs_vitepress/src/tutorials/installation.md b/docs_vitepress/src/tutorials/installation.md new file mode 100644 index 00000000..42e6363d --- /dev/null +++ b/docs_vitepress/src/tutorials/installation.md @@ -0,0 +1,62 @@ +# [Installation](@id install) + +In order to participate in the course, everyone should install a recent version of Julia together +with some text editor of choice. Furthermore during the course we will introduce some best practices +of creating/testing and distributing your own Julia code, for which we will require a GitHub +account. + +We recommend to install Julia via [`juliaup`](https://github.com/JuliaLang/juliaup). We are using +the latest, *stable* version of Julia (which at the time of this writing is `v1.9`). Once you have +installed `juliaup` you can get any Julia version you want via: + +```bash +$ juliaup add $JULIA_VERSION + +# or more concretely: +$ juliaup add 1.9 + +# but please, just use the latest, stable version +``` + +Now you should be able to start Julia an be greeted with the following: + +```bash +$ julia + _ + _ _ _(_)_ | Documentation: https://docs.julialang.org + (_) | (_) (_) | + _ _ _| |_ __ _ | Type "?" for help, "]?" for Pkg help. + | | | | | | |/ _` | | + | | |_| | | | (_| | | Version 1.9.2 (2023-07-05) + _/ |\__'_|_|_|\__'_| | Official https://julialang.org/ release +|__/ | + +julia> +``` + + +## Julia IDE + +There is no one way to install/develop and run Julia, which may be strange users coming from MATLAB, +but for users of general purpose languages such as Python, C++ this is quite common. Most of the +Julia programmers to date are using + +- [Visual Studio Code](https://code.visualstudio.com/), +- and the corresponding [Julia extension](https://www.julia-vscode.org/). + +This setup is described in a comprehensive [step-by-step +guide](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/installation/vscode/) +in our bachelor course [*Julia for Optimization & +Learning*](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/stable/). + +Note that this setup is not a strict requirement for the lectures/labs and any other text editor +with the option to send code to the terminal such as Vim (+Tmux), Emacs, or Sublime Text will +suffice. + +## GitHub registration & Git setup + +As one of the goals of the course is writing code that can be distributed to others, we recommend a +GitHub account, which you can create [here](https://github.com/) (unless you already have one). In +order to interact with GitHub repositories, we will be using `git`. For installation +instruction (Windows only) see the section in the bachelor +[course](https://juliateachingctu.github.io/Julia-for-Optimization-and-Learning/dev/installation/git/).