Skip to content

Commit ad4c312

Browse files
committed
Avoid the exception branch in expand
1 parent f0ea5b2 commit ad4c312

File tree

1 file changed

+33
-1
lines changed

1 file changed

+33
-1
lines changed

src/nditeration.jl

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,40 @@ Base.length(range::NDRange) = length(blocks(range))
8080
CartesianIndex(nI)
8181
end
8282

83+
84+
"""
85+
assume(cond::Bool)
86+
87+
Assume that the condition `cond` is true. This is a hint to the compiler, possibly enabling
88+
it to optimize more aggressively.
89+
"""
90+
@inline assume(cond::Bool) = Base.llvmcall(("""
91+
declare void @llvm.assume(i1)
92+
93+
define void @entry(i8) #0 {
94+
%cond = icmp eq i8 %0, 1
95+
call void @llvm.assume(i1 %cond)
96+
ret void
97+
}
98+
99+
attributes #0 = { alwaysinline }""", "entry"),
100+
Nothing, Tuple{Bool}, cond)
101+
102+
@inline function assume_nonzero(CI::CartesianIndices)
103+
ntuple(Val(ndims(CI))) do I
104+
@inline
105+
indices = CI.indices[I]
106+
assume(indices.stop > 0)
107+
end
108+
end
109+
83110
Base.@propagate_inbounds function expand(ndrange::NDRange, groupidx::Integer, idx::Integer)
84-
expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
111+
# this causes a exception branch and a div
112+
B = blocks(ndrange)
113+
W = workitems(ndrange)
114+
assume_nonzero(B)
115+
assume_nonzero(W)
116+
expand(ndrange, B[groupidx], workitems(ndrange)[idx])
85117
end
86118

87119
Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::Integer) where {N}

0 commit comments

Comments
 (0)