Skip to content

Commit bf4216a

Browse files
bors[bot]vchuravy
andauthored
Merge #116
116: Shared memory transpose r=vchuravy a=vchuravy cc: @leios Co-authored-by: Valentin Churavy <v.churavy@gmail.com>
2 parents a14b50b + d4aa159 commit bf4216a

File tree

3 files changed

+388
-50
lines changed

3 files changed

+388
-50
lines changed

docs/src/examples/performance.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Run under `nsight-cu`:
44

55
```sh
6-
nv-nsight-cu-cli --nvtx --profile-from-start=off --section=SpeedOfLight julia --project=examples examples/performance.jl
6+
nv-nsight-cu-cli --nvtx --profile-from-start=off --section=SpeedOfLight --section=julia --project=examples examples/performance.jl
77
```
88

99
## Results:

examples/Manifest.toml

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
# This file is machine-generated - editing it directly is not advised
2+
3+
[[AbstractFFTs]]
4+
deps = ["LinearAlgebra"]
5+
git-tree-sha1 = "051c95d6836228d120f5f4b984dd5aba1624f716"
6+
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
7+
version = "0.5.0"
8+
9+
[[Adapt]]
10+
deps = ["LinearAlgebra"]
11+
git-tree-sha1 = "c88cfc7f9c1f9f8633cddf0b56e86302b70f64c5"
12+
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
13+
version = "1.0.1"
14+
15+
[[Base64]]
16+
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
17+
18+
[[BinaryProvider]]
19+
deps = ["Libdl", "SHA"]
20+
git-tree-sha1 = "5b08ed6036d9d3f0ee6369410b830f8873d4024c"
21+
uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232"
22+
version = "0.5.8"
23+
24+
[[CEnum]]
25+
git-tree-sha1 = "62847acab40e6855a9b5905ccb99c2b5cf6b3ebb"
26+
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
27+
version = "0.2.0"
28+
29+
[[CUDAapi]]
30+
deps = ["Libdl", "Logging"]
31+
git-tree-sha1 = "d7ceadd8f821177d05b897c0517e94633db535fe"
32+
uuid = "3895d2a7-ec45-59b8-82bb-cfc6a382f9b3"
33+
version = "3.1.0"
34+
35+
[[CUDAdrv]]
36+
deps = ["CEnum", "CUDAapi", "Printf"]
37+
git-tree-sha1 = "01e90fa34e25776bc7c8661183d4519149ebfe59"
38+
uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde"
39+
version = "6.0.0"
40+
41+
[[CUDAnative]]
42+
deps = ["Adapt", "CEnum", "CUDAapi", "CUDAdrv", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Printf", "TimerOutputs"]
43+
git-tree-sha1 = "f86269ff60ebe082a2806ecbce51f3cadc68afe9"
44+
uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
45+
version = "2.10.2"
46+
47+
[[Cassette]]
48+
git-tree-sha1 = "f6a148cadd38ba328bd2c03442037ef801a6aa05"
49+
uuid = "7057c7e9-c182-5462-911a-8362d720325c"
50+
version = "0.3.1"
51+
52+
[[CuArrays]]
53+
deps = ["AbstractFFTs", "Adapt", "CEnum", "CUDAapi", "CUDAdrv", "CUDAnative", "DataStructures", "GPUArrays", "Libdl", "LinearAlgebra", "MacroTools", "NNlib", "Printf", "Random", "Requires", "SparseArrays", "TimerOutputs"]
54+
git-tree-sha1 = "7c20c5a45bb245cf248f454d26966ea70255b271"
55+
uuid = "3a865a2d-5b23-5a0f-bc46-62713ec82fae"
56+
version = "1.7.2"
57+
58+
[[DataStructures]]
59+
deps = ["InteractiveUtils", "OrderedCollections"]
60+
git-tree-sha1 = "5a431d46abf2ef2a4d5d00bd0ae61f651cf854c8"
61+
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
62+
version = "0.17.10"
63+
64+
[[Dates]]
65+
deps = ["Printf"]
66+
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
67+
68+
[[Distributed]]
69+
deps = ["Random", "Serialization", "Sockets"]
70+
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
71+
72+
[[DocStringExtensions]]
73+
deps = ["LibGit2", "Markdown", "Pkg", "Test"]
74+
git-tree-sha1 = "88bb0edb352b16608036faadcc071adda068582a"
75+
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
76+
version = "0.8.1"
77+
78+
[[GPUArrays]]
79+
deps = ["AbstractFFTs", "Adapt", "LinearAlgebra", "Printf", "Random", "Serialization"]
80+
git-tree-sha1 = "e756da6cee76a5f1436a05827fa8fdf3badc577f"
81+
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
82+
version = "2.0.1"
83+
84+
[[InteractiveUtils]]
85+
deps = ["Markdown"]
86+
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
87+
88+
[[KernelAbstractions]]
89+
deps = ["Adapt", "CUDAapi", "CUDAdrv", "CUDAnative", "Cassette", "MacroTools", "Requires", "StaticArrays"]
90+
path = ".."
91+
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
92+
version = "0.1.5"
93+
94+
[[LLVM]]
95+
deps = ["CEnum", "Libdl", "Printf", "Unicode"]
96+
git-tree-sha1 = "b6b86801ae2f2682e0a4889315dc76b68db2de71"
97+
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
98+
version = "1.3.4"
99+
100+
[[LibGit2]]
101+
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
102+
103+
[[Libdl]]
104+
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
105+
106+
[[LinearAlgebra]]
107+
deps = ["Libdl"]
108+
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
109+
110+
[[Logging]]
111+
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
112+
113+
[[MPI]]
114+
deps = ["Distributed", "DocStringExtensions", "Libdl", "Random", "Requires", "Serialization", "Sockets"]
115+
git-tree-sha1 = "a01349600200a1cb2157fd91e6d19e38642273ed"
116+
uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195"
117+
version = "0.12.0"
118+
119+
[[MacroTools]]
120+
deps = ["DataStructures", "Markdown", "Random"]
121+
git-tree-sha1 = "07ee65e03e28ca88bc9a338a3726ae0c3efaa94b"
122+
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
123+
version = "0.5.4"
124+
125+
[[Markdown]]
126+
deps = ["Base64"]
127+
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
128+
129+
[[NNlib]]
130+
deps = ["BinaryProvider", "Libdl", "LinearAlgebra", "Requires", "Statistics"]
131+
git-tree-sha1 = "d9f196d911f55aeaff11b11f681b135980783824"
132+
uuid = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
133+
version = "0.6.6"
134+
135+
[[OrderedCollections]]
136+
deps = ["Random", "Serialization", "Test"]
137+
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
138+
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
139+
version = "1.1.0"
140+
141+
[[Pkg]]
142+
deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Test", "UUIDs"]
143+
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
144+
145+
[[Printf]]
146+
deps = ["Unicode"]
147+
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
148+
149+
[[REPL]]
150+
deps = ["InteractiveUtils", "Markdown", "Sockets"]
151+
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
152+
153+
[[Random]]
154+
deps = ["Serialization"]
155+
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
156+
157+
[[Requires]]
158+
deps = ["UUIDs"]
159+
git-tree-sha1 = "d37400976e98018ee840e0ca4f9d20baa231dc6b"
160+
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
161+
version = "1.0.1"
162+
163+
[[SHA]]
164+
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
165+
166+
[[Serialization]]
167+
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
168+
169+
[[Sockets]]
170+
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
171+
172+
[[SparseArrays]]
173+
deps = ["LinearAlgebra", "Random"]
174+
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
175+
176+
[[StaticArrays]]
177+
deps = ["LinearAlgebra", "Random", "Statistics"]
178+
git-tree-sha1 = "5a3bcb6233adabde68ebc97be66e95dcb787424c"
179+
uuid = "90137ffa-7385-5640-81b9-e52037218182"
180+
version = "0.12.1"
181+
182+
[[Statistics]]
183+
deps = ["LinearAlgebra", "SparseArrays"]
184+
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
185+
186+
[[Test]]
187+
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
188+
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
189+
190+
[[TimerOutputs]]
191+
deps = ["Printf"]
192+
git-tree-sha1 = "311765af81bbb48d7bad01fb016d9c328c6ede03"
193+
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
194+
version = "0.5.3"
195+
196+
[[UUIDs]]
197+
deps = ["Random", "SHA"]
198+
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
199+
200+
[[Unicode]]
201+
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"

0 commit comments

Comments
 (0)