@@ -84,3 +84,241 @@ cdef object _rand_{{nptype}}(low, high, size, aug_state *state, lock):
84
84
85
85
{{endfor}}
86
86
87
+
88
+ cdef inline uint64_t _gen_mask(uint64_t max_val) nogil:
89
+ # Smallest bit mask >= max
90
+ cdef uint64_t mask = max_val
91
+ mask |= mask >> 1
92
+ mask |= mask >> 2
93
+ mask |= mask >> 4
94
+ mask |= mask >> 8
95
+ mask |= mask >> 16
96
+ mask |= mask >> 32
97
+ return mask
98
+
99
+ {{
100
+ py:
101
+ bc_ctypes = (('uint32', 'uint32', 'uint64', 'NPY_UINT64', 0, 0, 0, '0X100000000ULL'),
102
+ ('uint16', 'uint16', 'uint32', 'NPY_UINT32', 1, 16, 0, '0X10000UL'),
103
+ ('uint8', 'uint8', 'uint16', 'NPY_UINT16', 3, 8, 0, '0X100UL'),
104
+ ('bool','uint8', 'uint8', 'NPY_UINT8', 31, 1, 0, '0x2UL'),
105
+ ('int32', 'uint32', 'uint64', 'NPY_INT64', 0, 0, '-0x80000000LL', '0x80000000LL'),
106
+ ('int16', 'uint16', 'uint32', 'NPY_INT32', 1, 16, '-0x8000LL', '0x8000LL' ),
107
+ ('int8', 'uint8', 'uint16', 'NPY_INT16', 3, 8, '-0x80LL', '0x80LL' ),
108
+ )}}
109
+
110
+ {{for nptype, utype, nptype_up, npctype, remaining, bitshift, lb, ub in bc_ctypes}}
111
+
112
+ {{ py: otype = nptype + '_' if nptype == 'bool' else nptype }}
113
+
114
+ cdef object _rand_{{nptype}}_broadcast(object low, object high, object size, aug_state *state, object lock):
115
+ """
116
+ Generate bounded random {{nptype}} values using broadcasting
117
+
118
+ Parameters
119
+ ----------
120
+ low : int or array-like
121
+ Array containing the lowest (signed) integers to be drawn from the
122
+ distribution.
123
+ high : int or array-like
124
+ Array containing the the open interval bound for the distribution.
125
+ size : int or tuple of ints
126
+ Output shape. If the given shape is, e.g., ``(m, n, k)``, then
127
+ ``m * n * k`` samples are drawn. Default is None, in which case
128
+ the output shape is determined by the broadcast shapes of low and
129
+ high
130
+ state : augmented random state
131
+ State to use in the core random number generators
132
+ lock : threading.Lock
133
+ Lock to prevent multiple using a single RandomState simultaneously
134
+
135
+ Returns
136
+ -------
137
+ out : ndarray of np.{{nptype}}
138
+ array of random integers from the appropriate distribution where the
139
+ size is determined by size if provided or the broadcast shape of low
140
+ and high
141
+ """
142
+ cdef np.ndarray low_arr, high_arr, out_arr
143
+ cdef np.npy_intp i
144
+ cdef np.broadcast it
145
+ cdef int buf_rem = 0
146
+
147
+ cdef {{utype}}_t *out_data
148
+ cdef {{utype}}_t val, mask, off
149
+ cdef {{nptype_up}}_t rng, last_rng, low_v, high_v
150
+ cdef uint32_t buf
151
+
152
+ # TODO: Direct error check? Probably not
153
+ # TODO: Make constant?
154
+ low_arr = <np.ndarray>low
155
+ high_arr = <np.ndarray>high
156
+ if np.any(np.less(low_arr, {{lb}})):
157
+ raise ValueError('low is out of bounds for {{nptype}}')
158
+ if np.any(np.greater(high_arr, {{ub}})):
159
+ raise ValueError('high is out of bounds for {{nptype}}')
160
+ if np.any(np.greater_equal(low_arr, high_arr)):
161
+ raise ValueError('low >= high')
162
+
163
+ low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)
164
+ high_arr = <np.ndarray>np.PyArray_FROM_OTF(high, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)
165
+
166
+ if size is not None:
167
+ out_arr = <np.ndarray>np.empty(size, np.{{otype}})
168
+ else:
169
+ it = np.PyArray_MultiIterNew2(low_arr, high_arr)
170
+ out_arr = <np.ndarray>np.empty(it.shape, np.{{otype}})
171
+
172
+ it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
173
+ out_data = <{{utype}}_t *>np.PyArray_DATA(out_arr)
174
+ n = np.PyArray_SIZE(out_arr)
175
+ mask = last_rng = 0
176
+ with lock, nogil:
177
+ for i in range(n):
178
+ low_v = (<{{nptype_up}}_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
179
+ high_v = (<{{nptype_up}}_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
180
+ rng = (high_v - 1) - low_v
181
+ off = <{{utype}}_t>(<{{nptype_up}}_t>low_v)
182
+ if rng == 0:
183
+ out_data[i] = 0
184
+ continue
185
+
186
+ if rng != last_rng:
187
+ # TODO: Is this too much of an optimization? is it worth it?
188
+ # Smallest bit mask >= max
189
+ mask = <{{utype}}_t>_gen_mask(rng)
190
+
191
+ while True:
192
+ if not buf_rem:
193
+ buf = random_uint32(state)
194
+ buf_rem = {{remaining}}
195
+ else:
196
+ buf >>= {{bitshift}}
197
+ buf_rem -= 1
198
+ val = <{{utype}}_t>buf & mask
199
+ if val <= rng:
200
+ break
201
+ out_data[i] = off + val
202
+
203
+ np.PyArray_MultiIter_NEXT(it)
204
+
205
+ return out_arr
206
+
207
+ {{endfor}}
208
+
209
+
210
+ {{
211
+ py:
212
+ big_bc_ctypes = (('uint64', 'uint64', 'NPY_UINT64', '0x0ULL', '0xFFFFFFFFFFFFFFFFULL'),
213
+ ('int64', 'uint64', 'NPY_INT64', '-0x8000000000000000LL', '0x7FFFFFFFFFFFFFFFLL' )
214
+ )}}
215
+
216
+ {{for nptype, utype, npctype, lb, ub in big_bc_ctypes}}
217
+
218
+ {{ py: otype = nptype}}
219
+
220
+ cdef object _rand_{{nptype}}_broadcast(object low, object high, object size, aug_state *state, object lock):
221
+ """
222
+ Generate bounded random {{nptype}} values using broadcasting
223
+
224
+ Parameters
225
+ ----------
226
+ low : int or array-like
227
+ Array containing the lowest (signed) integers to be drawn from the
228
+ distribution.
229
+ high : int or array-like
230
+ Array containing the the open interval bound for the distribution.
231
+ size : int or tuple of ints
232
+ Output shape. If the given shape is, e.g., ``(m, n, k)``, then
233
+ ``m * n * k`` samples are drawn. Default is None, in which case
234
+ the output shape is determined by the broadcast shapes of low and
235
+ high
236
+ state : augmented random state
237
+ State to use in the core random number generators
238
+ lock : threading.Lock
239
+ Lock to prevent multiple using a single RandomState simultaneously
240
+
241
+ Returns
242
+ -------
243
+ out : ndarray of np.{{nptype}}
244
+ array of random integers from the appropriate distribution where the
245
+ size is determined by size if provided or the broadcast shape of low
246
+ and high
247
+ """
248
+ cdef np.ndarray low_arr, high_arr, out_arr, highm1_arr
249
+ cdef np.npy_intp i
250
+ cdef np.broadcast it
251
+ cdef int buf_rem = 0
252
+ cdef object closed_upper
253
+
254
+ cdef uint64_t *out_data
255
+ cdef {{nptype}}_t *highm1_data
256
+ cdef {{nptype}}_t low_v, high_v
257
+ cdef uint64_t rng, last_rng, val, mask, off
258
+
259
+ low_arr = <np.ndarray>low
260
+ high_arr = <np.ndarray>high
261
+
262
+ if np.any(np.less(low_arr, {{lb}})):
263
+ raise ValueError('low is out of bounds for {{nptype}}')
264
+
265
+ highm1_arr = <np.ndarray>np.empty_like(high_arr, dtype=np.{{nptype}})
266
+ highm1_data = <{{nptype}}_t *>np.PyArray_DATA(highm1_arr)
267
+ n = np.PyArray_SIZE(high_arr)
268
+ flat = high_arr.flat
269
+ for i in range(n):
270
+ closed_upper = int(flat[i]) - 1
271
+ if closed_upper > {{ub}}:
272
+ raise ValueError('high is out of bounds for {{nptype}}')
273
+ if closed_upper < {{lb}}:
274
+ raise ValueError('low >= high')
275
+
276
+ highm1_data[i] = <{{nptype}}_t>closed_upper
277
+
278
+ if np.any(np.greater(low_arr, highm1_arr)):
279
+ raise ValueError('low >= high')
280
+
281
+ high_arr = highm1_arr
282
+ low_arr = <np.ndarray>np.PyArray_FROM_OTF(low, np.{{npctype}}, np.NPY_ALIGNED | np.NPY_FORCECAST)
283
+
284
+ if size is not None:
285
+ out_arr = <np.ndarray>np.empty(size, np.{{nptype}})
286
+ else:
287
+ it = np.PyArray_MultiIterNew2(low_arr, high_arr)
288
+ out_arr = <np.ndarray>np.empty(it.shape, np.{{nptype}})
289
+
290
+ it = np.PyArray_MultiIterNew3(low_arr, high_arr, out_arr)
291
+ out_data = <uint64_t *>np.PyArray_DATA(out_arr)
292
+ n = np.PyArray_SIZE(out_arr)
293
+ mask = last_rng = 0
294
+ with lock, nogil:
295
+ for i in range(n):
296
+ low_v = (<{{nptype}}_t*>np.PyArray_MultiIter_DATA(it, 0))[0]
297
+ high_v = (<{{nptype}}_t*>np.PyArray_MultiIter_DATA(it, 1))[0]
298
+ rng = <{{nptype}}_t>(high_v - low_v) # No -1 here since implemented above
299
+ off = low_v
300
+ if rng == 0:
301
+ out_data[i] = 0
302
+ continue
303
+
304
+ if rng != last_rng:
305
+ mask = <uint64_t>_gen_mask(rng)
306
+
307
+ if rng <= 0xFFFFFFFFULL:
308
+ while True:
309
+ val = random_uint32(state) & mask
310
+ if val <= rng:
311
+ break
312
+ else:
313
+ while True:
314
+ val = random_uint64(state) & mask
315
+ if val <= rng:
316
+ break
317
+
318
+ out_data[i] = off + val
319
+
320
+ np.PyArray_MultiIter_NEXT(it)
321
+
322
+ return out_arr
323
+
324
+ {{endfor}}
0 commit comments