|
1 | 1 | from typing import Union, List
|
2 | 2 | import numpy as np
|
| 3 | +from numpy.typing import ArrayLike |
3 | 4 | from scipy.stats import t
|
4 | 5 |
|
5 | 6 |
|
@@ -112,8 +113,7 @@ def outliers_grubbs(
|
112 | 113 | G = val / np.std(arr, ddof=1)
|
113 | 114 | N = len(arr)
|
114 | 115 | result = G > (N - 1) / np.sqrt(N) * np.sqrt(
|
115 |
| - (t.ppf(1 - alpha / (2 * N), N - 2) ** 2) |
116 |
| - / (N - 2 + t.ppf(1 - alpha / (2 * N), N - 2) ** 2) |
| 116 | + (t.ppf(1 - alpha / (2 * N), N - 2) ** 2) / (N - 2 + t.ppf(1 - alpha / (2 * N), N - 2) ** 2) |
117 | 117 | )
|
118 | 118 |
|
119 | 119 | if hypo:
|
@@ -209,7 +209,7 @@ def tietjen(x_, k_):
|
209 | 209 |
|
210 | 210 |
|
211 | 211 | def outliers_gesd(
|
212 |
| - x: Union[List, np.ndarray], |
| 212 | + x: ArrayLike, |
213 | 213 | outliers: int = 5,
|
214 | 214 | hypo: bool = False,
|
215 | 215 | report: bool = False,
|
@@ -245,8 +245,8 @@ def outliers_gesd(
|
245 | 245 | Returns
|
246 | 246 | -------
|
247 | 247 | np.ndarray
|
248 |
| - Returns the filtered array if alternative hypo is True, otherwise an |
249 |
| - unfiltered (input) array. |
| 248 | + If hypo is True, returns a boolean array where True indicates an outlier. |
| 249 | + If hypo is False, returns the filtered array with outliers removed. |
250 | 250 |
|
251 | 251 | Notes
|
252 | 252 | -----
|
@@ -308,7 +308,7 @@ def outliers_gesd(
|
308 | 308 |
|
309 | 309 | # Masked values
|
310 | 310 | lms = ms[-1] if len(ms) > 0 else []
|
311 |
| - ms.append(lms + np.where(data == data_proc[np.argmax(abs_d)])[0].tolist()) |
| 311 | + ms.append(lms + [np.where(data == data_proc[np.argmax(abs_d)])[0][0]]) |
312 | 312 |
|
313 | 313 | # Remove the observation that maximizes |xi − xmean|
|
314 | 314 | data_proc = np.delete(data_proc, np.argmax(abs_d))
|
@@ -341,16 +341,12 @@ def outliers_gesd(
|
341 | 341 | # Remove masked values
|
342 | 342 | # for which the test statistic is greater
|
343 | 343 | # than the critical value and return the result
|
344 |
| - |
345 |
| - if any(rs > ls): |
346 |
| - if hypo: |
347 |
| - data[:] = False |
| 344 | + if hypo: |
| 345 | + data = np.zeros(n, dtype=bool) |
| 346 | + if any(rs > ls): |
348 | 347 | data[ms[np.max(np.where(rs > ls))]] = True
|
349 |
| - # rearrange data so mask is in same order as incoming data |
350 |
| - data = np.vstack((data, np.arange(0, data.shape[0])[argsort_index])) |
351 |
| - data = data[0, data.argsort()[1,]] |
352 |
| - data = data.astype("bool") |
353 |
| - else: |
354 |
| - data = np.delete(data, ms[np.max(np.where(rs > ls))]) |
355 |
| - |
356 |
| - return data |
| 348 | + return data |
| 349 | + else: |
| 350 | + if any(rs > ls): |
| 351 | + return np.delete(data, ms[np.max(np.where(rs > ls))]) |
| 352 | + return data |
0 commit comments