@@ -80,8 +80,40 @@ Base.length(range::NDRange) = length(blocks(range))
80
80
CartesianIndex (nI)
81
81
end
82
82
83
+
84
+ """
85
+ assume(cond::Bool)
86
+
87
+ Assume that the condition `cond` is true. This is a hint to the compiler, possibly enabling
88
+ it to optimize more aggressively.
89
+ """
90
+ @inline assume (cond:: Bool ) = Base. llvmcall (("""
91
+ declare void @llvm.assume(i1)
92
+
93
+ define void @entry(i8) #0 {
94
+ %cond = icmp eq i8 %0, 1
95
+ call void @llvm.assume(i1 %cond)
96
+ ret void
97
+ }
98
+
99
+ attributes #0 = { alwaysinline }""" , " entry" ),
100
+ Nothing, Tuple{Bool}, cond)
101
+
102
+ @inline function assume_nonzero (CI:: CartesianIndices )
103
+ ntuple (Val (ndims (CI))) do I
104
+ @inline
105
+ indices = CI. indices[I]
106
+ assume (indices. stop > 0 )
107
+ end
108
+ end
109
+
83
110
Base. @propagate_inbounds function expand (ndrange:: NDRange , groupidx:: Integer , idx:: Integer )
84
- expand (ndrange, blocks (ndrange)[groupidx], workitems (ndrange)[idx])
111
+ # this causes a exception branch and a div
112
+ B = blocks (ndrange)
113
+ W = workitems (ndrange)
114
+ assume_nonzero (B)
115
+ assume_nonzero (W)
116
+ expand (ndrange, B[groupidx], workitems (ndrange)[idx])
85
117
end
86
118
87
119
Base. @propagate_inbounds function expand (ndrange:: NDRange{N} , groupidx:: CartesianIndex{N} , idx:: Integer ) where {N}
0 commit comments