|
7 | 7 | """
|
8 | 8 |
|
9 | 9 | # Author: Nicolas Hug
|
| 10 | +import concurrent.futures |
10 | 11 |
|
11 | 12 | import numpy as np
|
12 | 13 |
|
@@ -226,22 +227,31 @@ def fit(self, X, y=None):
|
226 | 227 |
|
227 | 228 | self.missing_values_bin_idx_ = self.n_bins - 1
|
228 | 229 |
|
229 |
| - self.bin_thresholds_ = [] |
230 |
| - n_bins_non_missing = [] |
| 230 | + self.bin_thresholds_ = [None] * n_features |
| 231 | + n_bins_non_missing = [None] * n_features |
| 232 | + |
| 233 | + with concurrent.futures.ThreadPoolExecutor( |
| 234 | + max_workers=self.n_threads |
| 235 | + ) as executor: |
| 236 | + future_to_f_idx = { |
| 237 | + executor.submit(_find_binning_thresholds, X[:, f_idx], max_bins): f_idx |
| 238 | + for f_idx in range(n_features) |
| 239 | + if not self.is_categorical_[f_idx] |
| 240 | + } |
| 241 | + for future in concurrent.futures.as_completed(future_to_f_idx): |
| 242 | + f_idx = future_to_f_idx[future] |
| 243 | + self.bin_thresholds_[f_idx] = future.result() |
| 244 | + n_bins_non_missing[f_idx] = self.bin_thresholds_[f_idx].shape[0] + 1 |
231 | 245 |
|
232 | 246 | for f_idx in range(n_features):
|
233 |
| - if not self.is_categorical_[f_idx]: |
234 |
| - thresholds = _find_binning_thresholds(X[:, f_idx], max_bins) |
235 |
| - n_bins_non_missing.append(thresholds.shape[0] + 1) |
236 |
| - else: |
| 247 | + if self.is_categorical_[f_idx]: |
237 | 248 | # Since categories are assumed to be encoded in
|
238 | 249 | # [0, n_cats] and since n_cats <= max_bins,
|
239 | 250 | # the thresholds *are* the unique categorical values. This will
|
240 | 251 | # lead to the correct mapping in transform()
|
241 | 252 | thresholds = known_categories[f_idx]
|
242 |
| - n_bins_non_missing.append(thresholds.shape[0]) |
243 |
| - |
244 |
| - self.bin_thresholds_.append(thresholds) |
| 253 | + n_bins_non_missing[f_idx] = thresholds.shape[0] |
| 254 | + self.bin_thresholds_[f_idx] = thresholds |
245 | 255 |
|
246 | 256 | self.n_bins_non_missing_ = np.array(n_bins_non_missing, dtype=np.uint32)
|
247 | 257 | return self
|
|
0 commit comments