From 3fe916988fcb814372affbcabe0ef100ef55007b Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Fri, 26 Aug 2022 16:14:31 +0100 Subject: [PATCH 1/2] Init issue 11 implementation --- src/cluster/hierarchical.rs | 62 +++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 src/cluster/hierarchical.rs diff --git a/src/cluster/hierarchical.rs b/src/cluster/hierarchical.rs new file mode 100644 index 00000000..154cbfa5 --- /dev/null +++ b/src/cluster/hierarchical.rs @@ -0,0 +1,62 @@ +/// # Hierarchical clustering +/// +/// Implement hierarchical clustering methods: +/// * Agglomerative clustering (current) +/// * Bisecting K-Means (future) +/// * Fastcluster (future) +/// + +/* +class AgglomerativeClustering(): + """ + Parameters + ---------- + n_clusters : int or None, default=2 + The number of clusters to find. It must be ``None`` if + ``distance_threshold`` is not ``None``. + affinity : str or callable, default='euclidean' + If linkage is "ward", only "euclidean" is accepted. + linkage : {'ward',}, default='ward' + Which linkage criterion to use. The linkage criterion determines which + distance to use between sets of observation. The algorithm will merge + the pairs of cluster that minimize this criterion. + - 'ward' minimizes the variance of the clusters being merged. + compute_distances : bool, default=False + Computes distances between clusters even if `distance_threshold` is not + used. This can be used to make dendrogram visualization, but introduces + a computational and memory overhead. + """ + + def fit(X): + # compute tree + parents, childern = ward_tree(X, ....) + # compute clusters + labels = _hierarchical.hc_get_heads(parents) + # assign cluster numbers + self.labels_ = np.searchsorted(np.unique(labels), labels) + +*/ + +// implement ward tree + + +// implement hierarchical cut (only needed if we want to allwo compute_full_tree) (future) + + +// HOT: try to implement fastcluster (future) + + +// additional: implement BisectingKMeans (future) + + +mod tests { + // >>> from sklearn.cluster import AgglomerativeClustering + // >>> import numpy as np + // >>> X = np.array([[1, 2], [1, 4], [1, 0], + // ... [4, 2], [4, 4], [4, 0]]) + // >>> clustering = AgglomerativeClustering().fit(X) + // >>> clustering + // AgglomerativeClustering() + // >>> clustering.labels_ + // array([1, 1, 1, 0, 0, 0]) +} \ No newline at end of file From 20ca5c9647fe564013ccaccc0ac79002099a50aa Mon Sep 17 00:00:00 2001 From: "Lorenzo (Mec-iS)" Date: Tue, 30 Aug 2022 11:50:00 +0100 Subject: [PATCH 2/2] Add detailed tasks to docstring --- src/cluster/hierarchical.rs | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/cluster/hierarchical.rs b/src/cluster/hierarchical.rs index 154cbfa5..4e1477f9 100644 --- a/src/cluster/hierarchical.rs +++ b/src/cluster/hierarchical.rs @@ -28,9 +28,11 @@ class AgglomerativeClustering(): """ def fit(X): - # compute tree + # compute tree + # parents, childern = ward_tree(X, ....) # compute clusters + # labels = _hierarchical.hc_get_heads(parents) # assign cluster numbers self.labels_ = np.searchsorted(np.unique(labels), labels) @@ -38,15 +40,14 @@ class AgglomerativeClustering(): */ // implement ward tree - - -// implement hierarchical cut (only needed if we want to allwo compute_full_tree) (future) - - -// HOT: try to implement fastcluster (future) - - -// additional: implement BisectingKMeans (future) +// use scipy.cluster.hierarchy.ward +// +// use linkage +// +// use nn_chain +// + +// implement hc_get_heads mod tests {