|
1 | 1 | //! # Support Vector Classifier.
|
2 | 2 | //!
|
3 |
| -//! Example |
| 3 | +//! Support Vector Classifier (SVC) is a binary classifier that uses an optimal hyperplane to separate the points in the input variable space by their class. |
| 4 | +//! |
| 5 | +//! During training, SVC chooses a Maximal-Margin hyperplane that can separate all training instances with the largest margin. |
| 6 | +//! The margin is calculated as the perpendicular distance from the boundary to only the closest points. Hence, only these points are relevant in defining |
| 7 | +//! the hyperplane and in the construction of the classifier. These points are called the support vectors. |
| 8 | +//! |
| 9 | +//! While SVC selects a hyperplane with the largest margin it allows some points in the training data to violate the separating boundary. |
| 10 | +//! The parameter `C` > 0 gives you control over how SVC will handle violating points. The bigger the value of this parameter the more we penalize the algorithm |
| 11 | +//! for incorrectly classified points. In other words, setting this parameter to a small value will result in a classifier that allows for a big number |
| 12 | +//! of misclassified samples. Mathematically, SVC optimization problem can be defined as: |
| 13 | +//! |
| 14 | +//! \\[\underset{w, \zeta}{minimize} \space \space \frac{1}{2} \lVert \vec{w} \rVert^2 + C\sum_{i=1}^m \zeta_i \\] |
| 15 | +//! |
| 16 | +//! subject to: |
| 17 | +//! |
| 18 | +//! \\[y_i(\langle\vec{w}, \vec{x}_i \rangle + b) \geq 1 - \zeta_i \\] |
| 19 | +//! \\[\zeta_i \geq 0 for \space any \space i = 1, ... , m\\] |
| 20 | +//! |
| 21 | +//! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary. |
| 22 | +//! |
| 23 | +//! To solve this optimization problem, SmartCore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). |
| 24 | +//! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes |
| 25 | +//! through the data that the algorithm takes by changing the `epoch` parameter of the classifier. |
| 26 | +//! |
| 27 | +//! Example: |
4 | 28 | //!
|
5 | 29 | //! ```
|
6 | 30 | //! use smartcore::linalg::naive::dense_matrix::*;
|
|
47 | 71 | //!
|
48 | 72 | //! ## References:
|
49 | 73 | //!
|
50 |
| -//! * ["Support Vector Machines" Kowalczyk A., 2017](https://www.svm-tutorial.com/2017/10/support-vector-machines-succinctly-released/) |
| 74 | +//! * ["Support Vector Machines", Kowalczyk A., 2017](https://www.svm-tutorial.com/2017/10/support-vector-machines-succinctly-released/) |
51 | 75 | //! * ["Fast Kernel Classifiers with Online and Active Learning", Bordes A., Ertekin S., Weston J., Bottou L., 2005](https://www.jmlr.org/papers/volume6/bordes05a/bordes05a.pdf)
|
| 76 | +//! |
| 77 | +//! <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> |
| 78 | +//! <script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> |
52 | 79 |
|
53 | 80 | use std::collections::{HashMap, HashSet};
|
54 | 81 | use std::fmt::Debug;
|
@@ -220,7 +247,7 @@ impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> SVC<T, M, K> {
|
220 | 247 |
|
221 | 248 | impl<T: RealNumber, M: Matrix<T>, K: Kernel<T, M::RowVector>> PartialEq for SVC<T, M, K> {
|
222 | 249 | fn eq(&self, other: &Self) -> bool {
|
223 |
| - if self.b != other.b |
| 250 | + if (self.b - other.b).abs() > T::epsilon() * T::two() |
224 | 251 | || self.w.len() != other.w.len()
|
225 | 252 | || self.instances.len() != other.instances.len()
|
226 | 253 | {
|
|
0 commit comments