@@ -1439,111 +1439,3 @@ end
1439
1439
# r = Ref(tuple(5.0, 2.0, 1.0, 6.0))
1440
1440
# p = Base.unsafe_convert(Ptr{Float64}, r)
1441
1441
# u = unsafe_wrap(Array, p, 4)
1442
-
1443
- # ------------------------------------------------------------------------------------------------------
1444
- """
1445
- bv = isoutlier(x; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0) -> BitVector or BitMatrix
1446
-
1447
- Return a logical array whose elements are true when an outlier is detected in the corresponding element of `x`.
1448
-
1449
- If `x` is a matrix, and `width` is not used, then ``isoutlier`` operates on each column of `x` separately.
1450
- By default, an outlier is a value that is more than three median absolute deviations (MAD) from the median.
1451
- But that can be changed via the `critic` option.
1452
-
1453
- - `critic`: A number that sets the threshold for detecting outliers. The default is 3.0, which means that
1454
- a value is considered an outlier if it is more than 3 MAD from the median (when `method` is `:median`),
1455
- or more than 3 standard deviations (when `method` is `:mean`).
1456
- - `method`: The method used to calculate the threshold for outliers. It can be one of the following:
1457
- - `:median`: Uses the median absolute deviation (MAD) method. Outliers are defined as elements more than
1458
- `critic` MAD from the median.
1459
- - `:mean`: Uses the mean and standard deviation method. Outliers are defined as elements more than `critic`
1460
- standard deviations from the mean. This method is faster but less robust than "median".
1461
- - `:quartiles`: Uses the interquartile range (IQR) method. Outliers are defined as elements more than 1.5
1462
- interquartile ranges above the upper quartile (75 percent) or below the lower quartile (25 percent).
1463
- This method is useful when the data in `x` is not normally distributed.
1464
- - `threshold`: Is an alternative to the `method` option. It specifies the percentile thresholds, given as a
1465
- two-element array whose elements are in the interval [0, 100]. The first element indicates the lower percentile
1466
- threshold, and the second element indicates the upper percentile threshold. It can also be a single number
1467
- between (0, 100), which is interpreted as the percentage of end member points that are considered outliers.
1468
- For example, `threshold=1` means that the lower and upper thresholds are the 1th and 99th percentiles.
1469
- - `width`: If this option is used (only when `x` is a Matrix or a GMTdataset) we detect local outliers using
1470
- a moving window method with window length ``width``. This length is given in the same units as the input
1471
- data stored in first column of `x`.
1472
-
1473
- ### Example:
1474
- ```julia
1475
- x = [57, 59, 60, 100, 59, 58, 57, 58, 300, 61, 62, 60, 62, 58, 57];
1476
- findall(isoutlier(x))
1477
- 2-element Vector{Int64}:
1478
- 4
1479
- 9
1480
- ```
1481
-
1482
- ```julia
1483
- x = -50.0:50; y = x / 50 .+ 3 .+ 0.25 * rand(length(x));
1484
- y[[30,50,60]] = [4,-3,6]; # Add 3 outliers
1485
- findall(isoutlier([x y], width=5))
1486
- ```
1487
- """
1488
- function isoutlier (x:: AbstractVector{<:Real} ; critic= 3.0 , method:: Symbol = :median , threshold= nothing )
1489
- method in [:median , :mean , :quartiles ] || throw (ArgumentError (" Unknown method: $method . Use :median, :mean or :quartiles" ))
1490
- if (threshold != = nothing )
1491
- ! (isa (threshold, VecOrMat{<: Real }) || isa (threshold, Real)) &&
1492
- throw (ArgumentError (" Threshold: $threshold must be a scalar or a 2 elements array." ))
1493
- if (isa (threshold, VecOrMat{<: Real }))
1494
- @assert ! (length (threshold) == 2 && threshold[1 ] >= 0.0 && threshold[2 ] <= 100 ) " Threshold must be a 2 elements array in the [0 100] interval"
1495
- _thresh = [Float64 (threshold[1 ])/ 100 , Float64 (threshold[2 ])/ 100 ] # Make it [0 1]
1496
- else
1497
- @assert threshold > 0.0 && threshold < 100 " When threshold is a scalar, it must be in the ]0 100[ interval."
1498
- _thresh = [Float64 (threshold)/ 100 , Float64 (100 - threshold)/ 100 ] # Make it [0 1]
1499
- end
1500
- method = :threshold
1501
- end
1502
-
1503
- bv = BitVector (undef, length (x))
1504
- if (method == :median )
1505
- _mad, _med = mad (x); _mad *= critic
1506
- @inbounds for k in eachindex (x)
1507
- bv[k] = abs (x[k] - _med) > _mad
1508
- end
1509
- elseif (method == :mean )
1510
- _mean, _std = nanmean (x), nanstd (x) * critic;
1511
- @inbounds for k in eachindex (x)
1512
- bv[k] = abs (x[k] - _mean) > _std
1513
- end
1514
- elseif (method == :threshold )
1515
- q_l, q_u = quantile (x, _thresh)
1516
- @inbounds for k in eachindex (x)
1517
- bv[k] = (x[k] < q_l) || (x[k] > q_u)
1518
- end
1519
- else # method == :quartiles
1520
- q25, q75 = quantile (x, [0.25 , 0.75 ])
1521
- crit = (q75 - q25) * 1.5
1522
- q_minus, q_plus = q25 - crit, q75 + crit
1523
- @inbounds for k in eachindex (x)
1524
- bv[k] = (x[k] < q_minus) || (x[k] > q_plus)
1525
- end
1526
- end
1527
- return bv
1528
- end
1529
-
1530
- function isoutlier (mat:: AbstractMatrix{<:Real} ; critic= 3.0 , method:: Symbol = :median , threshold= nothing , width= 0.0 )
1531
- width > 0 && return isoutlier (mat2ds (mat), critic= critic, method= method, threshold= threshold, width= width)
1532
- bm = BitMatrix (undef, size (mat))
1533
- for k = 1 : size (mat,2 )
1534
- bm[:,k] .= isoutlier (view (mat, :, k), critic= critic, method= method, threshold= threshold)
1535
- end
1536
- return bm
1537
- end
1538
-
1539
- function isoutlier (D:: GMTdataset{<:Real,2} ; critic= 3.0 , method:: Symbol = :median , threshold= nothing , width= 0.0 )
1540
- (threshold === nothing && width <= 0 ) && error (" Must provide the window length of via the `width` option or use the `threshold` option." )
1541
- if (width > 0 )
1542
- method in [:median , :mean ] || throw (ArgumentError (" Unknown method: $method . Here use only one of :median or :mean" ))
1543
- (method == :mean ) && (method = boxcar) # :boxcar is the name in GMT for 'mean'
1544
- Dres = filter1d (D, filter= (type= method, width= width, highpass= true ), E= true )
1545
- isoutlier (view (Dres. data, :, 2 ), critic= critic, method= method, threshold= threshold)
1546
- else
1547
- isoutlier (view (D. data, :, 2 ), critic= critic, method= method, threshold= threshold)
1548
- end
1549
- end
0 commit comments