Skip to content

Commit 789d9a6

Browse files
authored
Move the isoutlier function/methods to a separate file. (GenericMappingTools#1710)
Move more files to "extras"
1 parent c9d8e0c commit 789d9a6

File tree

9 files changed

+134
-114
lines changed

9 files changed

+134
-114
lines changed

src/GMT.jl

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,6 @@ include("grdview.jl")
264264
include("grdvolume.jl")
265265
include("greenspline.jl")
266266
include("gridit.jl")
267-
include("hampel_outliers.jl")
268267
include("img_funs.jl")
269268
include("imgtiles.jl")
270269
include("imshow.jl")
@@ -323,10 +322,14 @@ include("xyz2grd.jl")
323322
include("utils.jl")
324323
include("utils_project.jl")
325324
include("choropleth_utils.jl")
326-
include("webmapserver.jl")
325+
include("extras/hampel_outliers.jl")
326+
include("extras/isoutlier.jl")
327+
include("extras/lowess.jl")
327328
include("extras/seismicity.jl")
328329
include("extras/okada.jl")
329330
include("extras/weather.jl")
331+
include("extras/webmapserver.jl")
332+
include("extras/whittaker.jl")
330333
include("seis/psmeca.jl")
331334
include("seis/gmtisf.jl")
332335
include("geodesy/psvelo.jl")
@@ -343,8 +346,6 @@ include("potential/gravfft.jl")
343346
include("potential/grdseamount.jl")
344347
include("spotter/grdrotater.jl")
345348
include("windbarbs/windbarbs.jl")
346-
include("lowess.jl")
347-
include("whittaker.jl")
348349
include("zscale.jl")
349350
include("drawing.jl")
350351
include("get_enums.jl")

src/common_options.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3789,7 +3789,7 @@ function _read_data(d::Dict, cmd::String, arg, opt_R::String="", is3D::Bool=fals
37893789
(t_col == 0) ? (s[1] *= cT; s[2] *= cT) : (t_col == 1) ? (s[3] *= cT; s[4] *= cT) : (t_col == 2 && length(s) == 6) ? (s[5] *= cT; s[6] *= cT) : error("-f option not cmpatible with -R")
37903790
opt_R = join(s, "/") # Reconstruct the -R with the T's in the time axis
37913791
# Remove the --TIME_UNIT from the command due to a GMT bug. But do it only when using the default -B. VERY RISKY. If fails => GB'
3792-
#(contains(cmd, DEF_FIG_AXES_BAK) && ((TU = scan_opt(cmd, "--TIME_UNIT=")) != "")) && (cmd = replace(cmd, " --TIME_UNIT="*TU => ""))
3792+
(contains(cmd, DEF_FIG_AXES_BAK) && ((TU = scan_opt(cmd, "--TIME_UNIT=")) == "y")) && (cmd = replace(cmd, " --TIME_UNIT="*TU => ""))
37933793
#((TU = scan_opt(cmd, "--TIME_UNIT=")) != "") && (cmd = replace(cmd, " --TIME_UNIT="*TU => ""))
37943794
#((TU = scan_opt(cmd, "--TIME_EPOCH=")) != "") && (cmd = replace(cmd, " --TIME_EPOCH="*TU => ""))
37953795
end
File renamed without changes.

src/extras/isoutlier.jl

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
"""
2+
bv = isoutlier(x; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0) -> Union{BitVector, BitMatrix}
3+
4+
Return a logical array whose elements are true when an outlier is detected in the corresponding element of `x`.
5+
6+
If `x` is a matrix, and `width` is not used, then ``isoutlier`` operates on each column of `x` separately.
7+
By default, an outlier is a value that is more than three median absolute deviations (MAD) from the median.
8+
But that can be changed via the `critic` option.
9+
10+
- `critic`: A number that sets the threshold for detecting outliers. The default is 3.0, which means that
11+
a value is considered an outlier if it is more than 3 MAD from the median (when `method` is `:median`),
12+
or more than 3 standard deviations (when `method` is `:mean`).
13+
- `method`: The method used to calculate the threshold for outliers. It can be one of the following:
14+
- `:median`: Uses the median absolute deviation (MAD) method. Outliers are defined as elements more than
15+
`critic` MAD from the median.
16+
- `:mean`: Uses the mean and standard deviation method. Outliers are defined as elements more than `critic`
17+
standard deviations from the mean. This method is faster but less robust than "median".
18+
- `:quartiles`: Uses the interquartile range (IQR) method. Outliers are defined as elements more than 1.5
19+
interquartile ranges above the upper quartile (75 percent) or below the lower quartile (25 percent).
20+
This method is useful when the data in `x` is not normally distributed.
21+
- `threshold`: Is an alternative to the `method` option. It specifies the percentile thresholds, given as a
22+
two-element array whose elements are in the interval [0, 100]. The first element indicates the lower percentile
23+
threshold, and the second element indicates the upper percentile threshold. It can also be a single number
24+
between (0, 100), which is interpreted as the percentage of end member points that are considered outliers.
25+
For example, `threshold=1` means that the lower and upper thresholds are the 1th and 99th percentiles.
26+
- `width`: If this option is used (only when `x` is a Matrix or a GMTdataset) we detect local outliers using
27+
a moving window method with window length ``width``. This length is given in the same units as the input
28+
data stored in first column of `x`.
29+
30+
### Example:
31+
```julia
32+
x = [57, 59, 60, 100, 59, 58, 57, 58, 300, 61, 62, 60, 62, 58, 57];
33+
findall(isoutlier(x))
34+
2-element Vector{Int64}:
35+
4
36+
9
37+
```
38+
39+
```julia
40+
x = -50.0:50; y = x / 50 .+ 3 .+ 0.25 * rand(length(x));
41+
y[[30,50,60]] = [4,-3,6]; # Add 3 outliers
42+
findall(isoutlier([x y], width=5))
43+
```
44+
"""
45+
function isoutlier(x::AbstractVector{<:Real}; critic=3.0, method::Symbol=:median, threshold=nothing)
46+
method in [:median, :mean, :quartiles] || throw(ArgumentError("Unknown method: $method. Use :median, :mean or :quartiles"))
47+
if (threshold !== nothing)
48+
!(isa(threshold, VecOrMat{<:Real}) || isa(threshold, Real)) &&
49+
throw(ArgumentError("Threshold: $threshold must be a scalar or a 2 elements array."))
50+
if (isa(threshold, VecOrMat{<:Real}))
51+
@assert !(length(threshold) == 2 && threshold[1] >= 0.0 && threshold[2] <= 100) "Threshold must be a 2 elements array in the [0 100] interval"
52+
_thresh = [Float64(threshold[1])/100, Float64(threshold[2])/100] # Make it [0 1]
53+
else
54+
@assert threshold > 0.0 && threshold < 100 "When threshold is a scalar, it must be in the ]0 100[ interval."
55+
_thresh = [Float64(threshold)/100, Float64(100 - threshold)/100] # Make it [0 1]
56+
end
57+
method = :threshold
58+
end
59+
60+
bv = BitVector(undef, length(x))
61+
if (method == :median)
62+
_mad, _med = mad(x); _mad *= critic
63+
@inbounds for k in eachindex(x)
64+
bv[k] = abs(x[k] - _med) > _mad
65+
end
66+
elseif (method == :mean)
67+
_mean, _std = nanmean(x), nanstd(x) * critic;
68+
@inbounds for k in eachindex(x)
69+
bv[k] = abs(x[k] - _mean) > _std
70+
end
71+
elseif (method == :threshold)
72+
q_l, q_u = quantile(x, _thresh)
73+
@inbounds for k in eachindex(x)
74+
bv[k] = (x[k] < q_l) || (x[k] > q_u)
75+
end
76+
else # method == :quartiles
77+
q25, q75 = quantile(x, [0.25, 0.75])
78+
crit = (q75 - q25) * 1.5
79+
q_minus, q_plus = q25 - crit, q75 + crit
80+
@inbounds for k in eachindex(x)
81+
bv[k] = (x[k] < q_minus) || (x[k] > q_plus)
82+
end
83+
end
84+
return bv
85+
end
86+
87+
function isoutlier(mat::AbstractMatrix{<:Real}; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0)
88+
width > 0 && return isoutlier(mat2ds(mat), critic=critic, method=method, threshold=threshold, width=width)
89+
bm = BitMatrix(undef, size(mat))
90+
for k = 1:size(mat,2)
91+
bm[:,k] .= isoutlier(view(mat, :, k), critic=critic, method=method, threshold=threshold)
92+
end
93+
return bm
94+
end
95+
96+
function isoutlier(D::GMTdataset{<:Real,2}; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0)
97+
(threshold === nothing && width <= 0) && error("Must provide the window length of via the `width` option or use the `threshold` option.")
98+
if (width > 0)
99+
method in [:median, :mean] || throw(ArgumentError("Unknown method: $method. Here use only one of :median or :mean"))
100+
(method == :mean) && (method = boxcar) # :boxcar is the name in GMT for 'mean'
101+
Dres = filter1d(D, filter=(type=method, width=width, highpass=true), E=true)
102+
isoutlier(view(Dres.data, :, 2), critic=critic, method=method, threshold=threshold)
103+
else
104+
isoutlier(view(D.data, :, 2), critic=critic, method=method, threshold=threshold)
105+
end
106+
end
File renamed without changes.
File renamed without changes.
File renamed without changes.

src/trend1d.jl

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ function trend1d(cmd0::String="", arg1=nothing; kwargs...)
4343

4444
cmd, = parse_common_opts(d, "", [:V_params :b :d :e :f :h :i :w :yx])
4545
cmd = parse_these_opts(cmd, d, [[:C :condition_number], [:I :conf_level :confidence_level], [:F :out :output], [:W :weights]])
46+
opt_F = scan_opt(cmd, "-F")
4647
((val = find_in_dict(d, [:N :model], false)[1]) === nothing) && error("The option 'model' must be specified")
4748
if (isa(val, Tuple) && isa(val[1], NamedTuple))
4849
# Complicated case here -- A mixed model ---. So input must be a Tuple of NamedTuples, and the +l+o+r options
@@ -68,7 +69,27 @@ function trend1d(cmd0::String="", arg1=nothing; kwargs...)
6869
(tryparse(Int, opt_N[4:end]) !== nothing) && (opt_N = " -Np" * opt_N[4:end]) # -N2 is old syntax and will error GMT
6970
cmd *= opt_N
7071

71-
common_grd(d, cmd0, cmd, "trend1d ", arg1) # Finish build cmd and run it
72+
R = common_grd(d, cmd0, cmd, "trend1d ", arg1) # Finish build cmd and run it
73+
!isa(R, GDtype) && return R # Should be the output of Vd=2
74+
75+
if (opt_F != "") # Extract column names from opt_F
76+
if (opt_F[1] == 'p' || opt_F[1] == 'P' || opt_F[1] == 'c')
77+
colnames = ["a$i" for i=0:size(D,2)-1]
78+
else
79+
s = collect(opt_F)
80+
colnames = Vector{String}(undef, length(s))
81+
for k = 1:numel(s)
82+
if (s[k] == 'x') colnames[k] = "x"
83+
elseif (s[k] == 'y') colnames[k] = "y"
84+
elseif (s[k] == 'm') colnames[k] = "model"
85+
elseif (s[k] == 'r') colnames[k] = "residues"
86+
elseif (s[k] == 'w') colnames[k] = "weights"
87+
end
88+
end
89+
end
90+
isa(R, GMTdataset) ? (R.colnames = colnames) : (R[1].colnames = colnames)
91+
end
92+
return R
7293
end
7394

7495
# ---------------------------------------------------------------------------------------------------

src/utils.jl

Lines changed: 0 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -1439,111 +1439,3 @@ end
14391439
#r = Ref(tuple(5.0, 2.0, 1.0, 6.0))
14401440
#p = Base.unsafe_convert(Ptr{Float64}, r)
14411441
#u = unsafe_wrap(Array, p, 4)
1442-
1443-
# ------------------------------------------------------------------------------------------------------
1444-
"""
1445-
bv = isoutlier(x; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0) -> BitVector or BitMatrix
1446-
1447-
Return a logical array whose elements are true when an outlier is detected in the corresponding element of `x`.
1448-
1449-
If `x` is a matrix, and `width` is not used, then ``isoutlier`` operates on each column of `x` separately.
1450-
By default, an outlier is a value that is more than three median absolute deviations (MAD) from the median.
1451-
But that can be changed via the `critic` option.
1452-
1453-
- `critic`: A number that sets the threshold for detecting outliers. The default is 3.0, which means that
1454-
a value is considered an outlier if it is more than 3 MAD from the median (when `method` is `:median`),
1455-
or more than 3 standard deviations (when `method` is `:mean`).
1456-
- `method`: The method used to calculate the threshold for outliers. It can be one of the following:
1457-
- `:median`: Uses the median absolute deviation (MAD) method. Outliers are defined as elements more than
1458-
`critic` MAD from the median.
1459-
- `:mean`: Uses the mean and standard deviation method. Outliers are defined as elements more than `critic`
1460-
standard deviations from the mean. This method is faster but less robust than "median".
1461-
- `:quartiles`: Uses the interquartile range (IQR) method. Outliers are defined as elements more than 1.5
1462-
interquartile ranges above the upper quartile (75 percent) or below the lower quartile (25 percent).
1463-
This method is useful when the data in `x` is not normally distributed.
1464-
- `threshold`: Is an alternative to the `method` option. It specifies the percentile thresholds, given as a
1465-
two-element array whose elements are in the interval [0, 100]. The first element indicates the lower percentile
1466-
threshold, and the second element indicates the upper percentile threshold. It can also be a single number
1467-
between (0, 100), which is interpreted as the percentage of end member points that are considered outliers.
1468-
For example, `threshold=1` means that the lower and upper thresholds are the 1th and 99th percentiles.
1469-
- `width`: If this option is used (only when `x` is a Matrix or a GMTdataset) we detect local outliers using
1470-
a moving window method with window length ``width``. This length is given in the same units as the input
1471-
data stored in first column of `x`.
1472-
1473-
### Example:
1474-
```julia
1475-
x = [57, 59, 60, 100, 59, 58, 57, 58, 300, 61, 62, 60, 62, 58, 57];
1476-
findall(isoutlier(x))
1477-
2-element Vector{Int64}:
1478-
4
1479-
9
1480-
```
1481-
1482-
```julia
1483-
x = -50.0:50; y = x / 50 .+ 3 .+ 0.25 * rand(length(x));
1484-
y[[30,50,60]] = [4,-3,6]; # Add 3 outliers
1485-
findall(isoutlier([x y], width=5))
1486-
```
1487-
"""
1488-
function isoutlier(x::AbstractVector{<:Real}; critic=3.0, method::Symbol=:median, threshold=nothing)
1489-
method in [:median, :mean, :quartiles] || throw(ArgumentError("Unknown method: $method. Use :median, :mean or :quartiles"))
1490-
if (threshold !== nothing)
1491-
!(isa(threshold, VecOrMat{<:Real}) || isa(threshold, Real)) &&
1492-
throw(ArgumentError("Threshold: $threshold must be a scalar or a 2 elements array."))
1493-
if (isa(threshold, VecOrMat{<:Real}))
1494-
@assert !(length(threshold) == 2 && threshold[1] >= 0.0 && threshold[2] <= 100) "Threshold must be a 2 elements array in the [0 100] interval"
1495-
_thresh = [Float64(threshold[1])/100, Float64(threshold[2])/100] # Make it [0 1]
1496-
else
1497-
@assert threshold > 0.0 && threshold < 100 "When threshold is a scalar, it must be in the ]0 100[ interval."
1498-
_thresh = [Float64(threshold)/100, Float64(100 - threshold)/100] # Make it [0 1]
1499-
end
1500-
method = :threshold
1501-
end
1502-
1503-
bv = BitVector(undef, length(x))
1504-
if (method == :median)
1505-
_mad, _med = mad(x); _mad *= critic
1506-
@inbounds for k in eachindex(x)
1507-
bv[k] = abs(x[k] - _med) > _mad
1508-
end
1509-
elseif (method == :mean)
1510-
_mean, _std = nanmean(x), nanstd(x) * critic;
1511-
@inbounds for k in eachindex(x)
1512-
bv[k] = abs(x[k] - _mean) > _std
1513-
end
1514-
elseif (method == :threshold)
1515-
q_l, q_u = quantile(x, _thresh)
1516-
@inbounds for k in eachindex(x)
1517-
bv[k] = (x[k] < q_l) || (x[k] > q_u)
1518-
end
1519-
else # method == :quartiles
1520-
q25, q75 = quantile(x, [0.25, 0.75])
1521-
crit = (q75 - q25) * 1.5
1522-
q_minus, q_plus = q25 - crit, q75 + crit
1523-
@inbounds for k in eachindex(x)
1524-
bv[k] = (x[k] < q_minus) || (x[k] > q_plus)
1525-
end
1526-
end
1527-
return bv
1528-
end
1529-
1530-
function isoutlier(mat::AbstractMatrix{<:Real}; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0)
1531-
width > 0 && return isoutlier(mat2ds(mat), critic=critic, method=method, threshold=threshold, width=width)
1532-
bm = BitMatrix(undef, size(mat))
1533-
for k = 1:size(mat,2)
1534-
bm[:,k] .= isoutlier(view(mat, :, k), critic=critic, method=method, threshold=threshold)
1535-
end
1536-
return bm
1537-
end
1538-
1539-
function isoutlier(D::GMTdataset{<:Real,2}; critic=3.0, method::Symbol=:median, threshold=nothing, width=0.0)
1540-
(threshold === nothing && width <= 0) && error("Must provide the window length of via the `width` option or use the `threshold` option.")
1541-
if (width > 0)
1542-
method in [:median, :mean] || throw(ArgumentError("Unknown method: $method. Here use only one of :median or :mean"))
1543-
(method == :mean) && (method = boxcar) # :boxcar is the name in GMT for 'mean'
1544-
Dres = filter1d(D, filter=(type=method, width=width, highpass=true), E=true)
1545-
isoutlier(view(Dres.data, :, 2), critic=critic, method=method, threshold=threshold)
1546-
else
1547-
isoutlier(view(D.data, :, 2), critic=critic, method=method, threshold=threshold)
1548-
end
1549-
end

0 commit comments

Comments
 (0)