refactor examples, rename import in performance_eval

mbucchi · mbucchi · commit 79e6d2d1a7c0 · 2019-04-02T11:32:34.000-03:00
diff --git a/examples/feature_extraction/ellipses.ipynb b/examples/feature_extraction/ellipses.ipynb
diff --git a/examples/feature_extraction/ellipses.py b/examples/feature_extraction/ellipses.py
@@ -1,3 +1,6 @@
+# %% [markdown]
+# # Detection of rice grains using ellipses
+
 # %%
 import matplotlib.pyplot as plt
 import numpy as np
@@ -8,6 +11,11 @@
 from pybalu.io import imread
 from skimage.measure import label
 
+# %% [markdown]
+# ## Matplotlib setup
+# The following code is used to set up the default parameters for all the 
+# plots shown by matplotlib
+
 # %%
 import matplotlib
 matplotlib.rcParams["figure.figsize"] = (7, 7)
@@ -16,12 +24,24 @@
 matplotlib.rcParams["figure.figsize"] = (7, 7)
 del matplotlib
 
+# %% [markdown]
+# ## Loading and displaying the image
+
 # %%
 im = imread("feature_extraction/rice.png")
 plt.title("Original Image", fontdict={"fontsize": 20}, pad=20)
 plt.imshow(im, cmap="gray")
 plt.show()
 
+# %% [markdown]
+# ## Recognizing rice grains
+# In order to recognize the grains, the following steps are followed:
+#
+#   1. Image is transformed to binary 
+#   2. Rice grains are separated and labeled accordingly
+#   3. Geometric features are calculated for each grain by using `basic_geo_features` function
+#   4. An `Ellipse` object is built for each grain
+
 # %%
 im_bin = (im > 140).astype(int)
 labeled, n = label(im_bin, return_num=True)
@@ -32,17 +52,21 @@ def calc_ellipse(idx):
     region = (labeled_T == idx).astype(int)
     box = _bbox(region)
     feats = basic_geo_features(region[box])
-    # feats[0]:  center of grav i [px]
-    # feats[1]:  center of grav j [px]
-    # feats[10]: MajorAxisLength  [px]
-    # feats[11]: MinorAxisLength  [px]
-    # feats[12]: Orientation      [deg]
-    return np.array([box[0].start + feats[0], box[1].start + feats[1], feats[10], feats[11], feats[12]])
+    return np.array([
+        box[0].start + feats[0],    # feats[0]:  center of grav i [px]
+        box[1].start + feats[1],    # feats[1]:  center of grav j [px]
+        feats[10],                  # feats[10]: MajorAxisLength  [px]
+        feats[11],                  # feats[11]: MinorAxisLength  [px]
+        feats[12]                   # feats[12]: Orientation      [deg]
+    ])
 
 
 with Pool() as pool:
     ellipses = np.vstack(pool.map(calc_ellipse, range(1, n)))
 
+# %% [markdown]
+# ## Displaying the ellipses over the original image
+
 # %%
 ax = plt.axes()
 plt.title("Segmented Image")
@@ -62,6 +86,12 @@ def draw_ellipse(x, y, height, width, angle, axes):
 
 plt.show()
 
+# %% [markdown]
+# ## Finding mean sized (major axis) rice grains
+# Rice grains whose major axis is between the 25th and 75th percentile are highlighted.
+# This is done with the help numpy matrix operations
+
+
 # %%
 plt.title("Mean sized rice grains (major axis)")
 
@@ -75,6 +105,12 @@ def draw_ellipse(x, y, height, width, angle, axes):
 plt.imshow(im_mean, cmap="gray")
 plt.show()
 
+
+# %% [markdown]
+# ## Finding rice grains oriented at a specific angle
+# Rice grains rotation is within 10deg of the given angle are highlighted. Just as 
+# before, this is done with the help of numpy matrix operations
+
 # %%
 
 
diff --git a/examples/feature_selection/sfs.ipynb b/examples/feature_selection/sfs.ipynb
diff --git a/examples/feature_selection/sfs.py b/examples/feature_selection/sfs.py
@@ -1,40 +1,84 @@
+# %% [markdown]
+# # SFS feature selection
+
 # %%
+import matplotlib.pyplot as plt
+from sklearn.neighbors import KNeighborsClassifier
+from pybalu.performance_eval import performance
+from pybalu.classification import structure
+from pybalu.feature_selection import sfs
+from pybalu.feature_transformation import normalize
+from pybalu.data_selection import stratify
 from scipy.io import loadmat
 
-# load a dataset with 810 samples and 294 features
+# %% [markdown]
+# ## Matplotlib setup
+# The following code is used to set up the default parameters for all the 
+# plots shown by matplotlib
+
+# %%
+import matplotlib
+matplotlib.rcParams["figure.figsize"] = (7, 7)
+matplotlib.rcParams["axes.titlesize"] = 20
+matplotlib.rcParams["axes.titlepad"] = 15
+matplotlib.rcParams["figure.figsize"] = (7, 7)
+del matplotlib
+
+# %% [markdown]
+# ## Loading classification data
+# A dataset with features already extracted is loaded.
+#
+# This dataset consists of 810 samples and 294 features.
+# Two types of class are defined (0 and 1) with 405 samples
+# each.
+
+# %%
 data = loadmat("feature_selection/realdata")
 features = data["features"]
 classes = data["classes"].squeeze()
 
-# %%
-from pybalu.data_selection import stratify
-from pybalu.feature_transformation import normalize
 
-# Training and Testing data (90% training, 10% testing)
+# %% [markdown]
+# ## Dataset separation for training and testing
+# The dataset is separated into two diferent categories: 
+# 90% for training and 10% for testing.
+
+# %%
 idx_train, idx_test = stratify(classes, .90)
 f_train = features[idx_train]
 c_train = classes[idx_train]
 f_test = features[idx_test]
 c_test = classes[idx_test]
 
+# %% [markdown]
+# ## Feature normalization
+# Features are normalized for better (?) sfs performance
+
+# %%
 f_train_norm, a, b = normalize(f_train)
 f_test_norm = f_test * a + b
 
-# %%
-from pybalu.feature_selection import sfs
 
+# %% [markdown]
+# ## Feature selection
+
+# %%
 N_FEATURES = 15
 
 selected_feats = sfs(f_train_norm, c_train, n_features=N_FEATURES,
                      method="fisher", show=True)
 
 
-# %%
-from pybalu.classification import structure
-from pybalu.performance_eval import performance
-from sklearn.neighbors import KNeighborsClassifier
-import matplotlib.pyplot as plt
 
+# %% [markdown]
+# ## Performance evaluation
+# Feature selection is evaluated iteratively for each subset of
+# 1, 2, ..., 15 best features
+#
+# Data is classified using a K Nearest Neighbors classifier
+# using 3 neighbors
+
+# %%
 def performance_for_features(feat_idxs):
     # train classifier
     knn = KNeighborsClassifier(n_neighbors=3)
@@ -48,7 +92,7 @@ def performance_for_features(feat_idxs):
 values = [performance_for_features(selected_feats[:i]) * 100
           for i in range(1, N_FEATURES + 1)]
 
-plt.plot(values)
+plt.bar(*zip(*enumerate(values)), tick_label=range(1, N_FEATURES+1))
 plt.title("Performance vs. number of features")
 plt.xlabel('selected features')
 plt.ylabel('accuracy [%]')
diff --git a/pybalu/__init__.py b/pybalu/__init__.py
@@ -11,4 +11,4 @@
            "misc",
            "performance_eval"]
 
-__version__ = '0.2.2'
+__version__ = '0.2.3'
diff --git a/pybalu/performance_eval/__init__.py b/pybalu/performance_eval/__init__.py
@@ -1,5 +1,5 @@
-from ._confusion import confusion
-from ._crossval import crossval
-from ._performance import performance
+from .confusion import confusion
+from .crossval import crossval
+from .performance import performance
 
 __all__ = ["confusion", "crossval", "performance"]