|
| 1 | +module NearestNeighborModels |
| 2 | + |
| 3 | +# =================================================================== |
| 4 | +# IMPORTS |
| 5 | +import MLJModelInterface |
| 6 | +import MLJModelInterface: @mlj_model, metadata_model, metadata_pkg, |
| 7 | + Table, Continuous, Count, Finite, OrderedFactor, Multiclass |
| 8 | +import NearestNeighbors |
| 9 | +import StatsBase |
| 10 | +import Tables |
| 11 | + |
| 12 | +using Distances |
| 13 | +using FillArrays |
| 14 | +using LinearAlgebra |
| 15 | +using Statistics |
| 16 | + |
| 17 | +# =================================================================== |
| 18 | +## EXPORTS |
| 19 | +export list_kernels, ColumnTable, DictTable |
| 20 | + |
| 21 | +# Export KNN models |
| 22 | +# KNN models are exported automatically by `@mjl_model` |
| 23 | + |
| 24 | +# Re-Export Distance Metrics from `Distances.jl` |
| 25 | +export Euclidean, Cityblock, Minkowski, Chebyshev, Hamming, WeightedEuclidean, |
| 26 | + WeightedCityblock, WeightedMinkowski |
| 27 | + |
| 28 | +# Export KNN Kernels |
| 29 | +export DualU, DualD, Dudani, Fibonacci, Inverse, ISquared, KNNKernel, Macleod, Rank, |
| 30 | + ReciprocalRank, UDK, Uniform, UserDefinedKernel, Zavreal |
| 31 | + |
| 32 | +# =================================================================== |
| 33 | +## CONSTANTS |
| 34 | +const Vec{T} = AbstractVector{T} |
| 35 | +const Mat{T} = AbstractMatrix{T} |
| 36 | +const Arr{T, N} = AbstractArray{T, N} |
| 37 | +const ColumnTable = Tables.ColumnTable |
| 38 | +const DictTable = Dict{Symbol, <:AbstractVector} |
| 39 | +const MultiUnivariateFinite = Union{DictTable, ColumnTable} |
| 40 | + |
| 41 | +# Define constants for easy referencing of packages |
| 42 | +const MMI = MLJModelInterface |
| 43 | +const NN = NearestNeighbors |
| 44 | +const PKG = "NearestNeighborsModels" |
| 45 | + |
| 46 | +# Definitions of model descriptions for use in model doc-strings. |
| 47 | +const KNNRegressorDescription = """ |
| 48 | + K-Nearest Neighbors regressor: predicts the response associated with a new point |
| 49 | + by taking an weighted average of the response of the K-nearest points. |
| 50 | + """ |
| 51 | + |
| 52 | +const KNNClassifierDescription = """ |
| 53 | + K-Nearest Neighbors classifier: predicts the class associated with a new point |
| 54 | + by taking a vote over the classes of the K-nearest points. |
| 55 | + """ |
| 56 | + |
| 57 | +const KNNCoreFields = """ |
| 58 | + * `K::Int=5` : number of neighbors |
| 59 | + * `algorithm::Symbol = :kdtree` : one of `(:kdtree, :brutetree, :balltree)` |
| 60 | + * `metric::Metric = Euclidean()` : a `Metric` object for the distance between points |
| 61 | + * `leafsize::Int = 10` : at what number of points to stop splitting the tree |
| 62 | + * `reorder::Bool = true` : if true puts points close in distance close in memory |
| 63 | + * `weights::KNNKernel=Uniform()` : kernel used in assigning weights to the |
| 64 | + k-nearest neighbors for each observation. An instance of one of the types in |
| 65 | + `list_kernels()`. User-defined weighting functions can be passed by wrapping the |
| 66 | + function in a `UDF` kernel. If sample weights `w` are passed during machine |
| 67 | + construction e.g `machine(model, X, y, w)` then the weight assigned to each |
| 68 | + neighbor is the product of the `KNNKernel` generated weight and the corresponding |
| 69 | + neighbor sample weight. |
| 70 | + |
| 71 | + """ |
| 72 | + |
| 73 | +const SeeAlso = """ |
| 74 | + See also the |
| 75 | + [package documentation](https://github.com/KristofferC/NearestNeighbors.jl). |
| 76 | + For more information about the kernels see the paper by Geler et.al |
| 77 | + [Comparison of different weighting schemes for the kNN classifier |
| 78 | + on time-series data] |
| 79 | + (https://perun.pmf.uns.ac.rs/radovanovic/publications/2016-kais-knn-weighting.pdf). |
| 80 | + """ |
| 81 | + |
| 82 | +const MultitargetKNNClassifierFields = """ |
| 83 | + ## Keywords Parameters |
| 84 | + |
| 85 | + $KNNCoreFields |
| 86 | + * `output_type::Type{<:MultiUnivariateFinite}=DictTable` : One of |
| 87 | + (`ColumnTable`, `DictTable`). The type of table type to use for predictions. |
| 88 | + Setting to `ColumnTable` might improve performance for narrow tables while setting to |
| 89 | + `DictTable` improves performance for wide tables. |
| 90 | + |
| 91 | + $SeeAlso |
| 92 | + |
| 93 | + """ |
| 94 | + |
| 95 | +const KNNFields = """ |
| 96 | + ## Keywords Parameters |
| 97 | + |
| 98 | + $KNNCoreFields |
| 99 | + |
| 100 | + $SeeAlso |
| 101 | + |
| 102 | + """ |
| 103 | + |
| 104 | +# =================================================================== |
| 105 | +# Includes |
| 106 | +include("utils.jl") |
| 107 | +include("kernels.jl") |
| 108 | +include("models.jl") |
| 109 | + |
| 110 | +# =================================================================== |
| 111 | +# List of all models interfaced |
| 112 | +const MODELS = ( |
| 113 | + KNNClassifier, KNNRegressor, MultitargetKNNRegressor, MultitargetKNNClassifier |
| 114 | +) |
| 115 | + |
| 116 | +# ==================================================================== |
| 117 | +# PKG_METADATA |
| 118 | +metadata_pkg.( |
| 119 | + MODELS, |
| 120 | + name = "NearestNeighborModels", |
| 121 | + uuid = "6f286f6a-111f-5878-ab1e-185364afe411", |
| 122 | + url = "https://github.com/alan-turing-institute/NearestNeighborModels.jl", |
| 123 | + license = "MIT", |
| 124 | + julia = true, |
| 125 | + is_wrapper = false |
| 126 | +) |
| 127 | + |
| 128 | +end # module |
0 commit comments