Skip to content

Commit eaa3d61

Browse files
committed
Optimize expand to avoid one unecessary sdiv
1 parent 419481c commit eaa3d61

File tree

1 file changed

+14
-2
lines changed

1 file changed

+14
-2
lines changed

src/nditeration.jl

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,20 @@ Base.length(range::NDRange) = length(blocks(range))
8080
CartesianIndex(nI)
8181
end
8282

83-
Base.@propagate_inbounds function expand(ndrange::NDRange, groupidx::Integer, idx::Integer)
84-
expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
83+
Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::Integer, idx::Integer) where {N}
84+
# This causes two sdiv operations, one for each Linear to CartesianIndex
85+
# expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
86+
87+
# The formulation below saves one sdiv
88+
B = blocks(ndrange)
89+
W = workitems(ndrange)
90+
Ind = ntuple(Val(N)) do I
91+
Base.@_inline_meta
92+
b = B.indices[I]
93+
w = W.indices[I]
94+
length(b) * length(w)
95+
end
96+
CartesianIndices(Ind)[(groupidx-1)* prod(size(B)) + idx]
8597
end
8698

8799
Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::Integer) where {N}

0 commit comments

Comments
 (0)