Skip to content

Commit 79e6d2d

Browse files
committed
refactor examples, rename import in performance_eval
1 parent 9cd8ce7 commit 79e6d2d

File tree

6 files changed

+308
-98
lines changed

6 files changed

+308
-98
lines changed

examples/feature_extraction/ellipses.ipynb

Lines changed: 84 additions & 30 deletions
Large diffs are not rendered by default.

examples/feature_extraction/ellipses.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# %% [markdown]
2+
# # Detection of rice grains using ellipses
3+
14
# %%
25
import matplotlib.pyplot as plt
36
import numpy as np
@@ -8,6 +11,11 @@
811
from pybalu.io import imread
912
from skimage.measure import label
1013

14+
# %% [markdown]
15+
# ## Matplotlib setup
16+
# The following code is used to set up the default parameters for all the
17+
# plots shown by matplotlib
18+
1119
# %%
1220
import matplotlib
1321
matplotlib.rcParams["figure.figsize"] = (7, 7)
@@ -16,12 +24,24 @@
1624
matplotlib.rcParams["figure.figsize"] = (7, 7)
1725
del matplotlib
1826

27+
# %% [markdown]
28+
# ## Loading and displaying the image
29+
1930
# %%
2031
im = imread("feature_extraction/rice.png")
2132
plt.title("Original Image", fontdict={"fontsize": 20}, pad=20)
2233
plt.imshow(im, cmap="gray")
2334
plt.show()
2435

36+
# %% [markdown]
37+
# ## Recognizing rice grains
38+
# In order to recognize the grains, the following steps are followed:
39+
#
40+
# 1. Image is transformed to binary
41+
# 2. Rice grains are separated and labeled accordingly
42+
# 3. Geometric features are calculated for each grain by using `basic_geo_features` function
43+
# 4. An `Ellipse` object is built for each grain
44+
2545
# %%
2646
im_bin = (im > 140).astype(int)
2747
labeled, n = label(im_bin, return_num=True)
@@ -32,17 +52,21 @@ def calc_ellipse(idx):
3252
region = (labeled_T == idx).astype(int)
3353
box = _bbox(region)
3454
feats = basic_geo_features(region[box])
35-
# feats[0]: center of grav i [px]
36-
# feats[1]: center of grav j [px]
37-
# feats[10]: MajorAxisLength [px]
38-
# feats[11]: MinorAxisLength [px]
39-
# feats[12]: Orientation [deg]
40-
return np.array([box[0].start + feats[0], box[1].start + feats[1], feats[10], feats[11], feats[12]])
55+
return np.array([
56+
box[0].start + feats[0], # feats[0]: center of grav i [px]
57+
box[1].start + feats[1], # feats[1]: center of grav j [px]
58+
feats[10], # feats[10]: MajorAxisLength [px]
59+
feats[11], # feats[11]: MinorAxisLength [px]
60+
feats[12] # feats[12]: Orientation [deg]
61+
])
4162

4263

4364
with Pool() as pool:
4465
ellipses = np.vstack(pool.map(calc_ellipse, range(1, n)))
4566

67+
# %% [markdown]
68+
# ## Displaying the ellipses over the original image
69+
4670
# %%
4771
ax = plt.axes()
4872
plt.title("Segmented Image")
@@ -62,6 +86,12 @@ def draw_ellipse(x, y, height, width, angle, axes):
6286

6387
plt.show()
6488

89+
# %% [markdown]
90+
# ## Finding mean sized (major axis) rice grains
91+
# Rice grains whose major axis is between the 25th and 75th percentile are highlighted.
92+
# This is done with the help numpy matrix operations
93+
94+
6595
# %%
6696
plt.title("Mean sized rice grains (major axis)")
6797

@@ -75,6 +105,12 @@ def draw_ellipse(x, y, height, width, angle, axes):
75105
plt.imshow(im_mean, cmap="gray")
76106
plt.show()
77107

108+
109+
# %% [markdown]
110+
# ## Finding rice grains oriented at a specific angle
111+
# Rice grains rotation is within 10deg of the given angle are highlighted. Just as
112+
# before, this is done with the help of numpy matrix operations
113+
78114
# %%
79115

80116

examples/feature_selection/sfs.ipynb

Lines changed: 121 additions & 45 deletions
Large diffs are not rendered by default.

examples/feature_selection/sfs.py

Lines changed: 57 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,84 @@
1+
# %% [markdown]
2+
# # SFS feature selection
3+
14
# %%
5+
import matplotlib.pyplot as plt
6+
from sklearn.neighbors import KNeighborsClassifier
7+
from pybalu.performance_eval import performance
8+
from pybalu.classification import structure
9+
from pybalu.feature_selection import sfs
10+
from pybalu.feature_transformation import normalize
11+
from pybalu.data_selection import stratify
212
from scipy.io import loadmat
313

4-
# load a dataset with 810 samples and 294 features
14+
# %% [markdown]
15+
# ## Matplotlib setup
16+
# The following code is used to set up the default parameters for all the
17+
# plots shown by matplotlib
18+
19+
# %%
20+
import matplotlib
21+
matplotlib.rcParams["figure.figsize"] = (7, 7)
22+
matplotlib.rcParams["axes.titlesize"] = 20
23+
matplotlib.rcParams["axes.titlepad"] = 15
24+
matplotlib.rcParams["figure.figsize"] = (7, 7)
25+
del matplotlib
26+
27+
# %% [markdown]
28+
# ## Loading classification data
29+
# A dataset with features already extracted is loaded.
30+
#
31+
# This dataset consists of 810 samples and 294 features.
32+
# Two types of class are defined (0 and 1) with 405 samples
33+
# each.
34+
35+
# %%
536
data = loadmat("feature_selection/realdata")
637
features = data["features"]
738
classes = data["classes"].squeeze()
839

9-
# %%
10-
from pybalu.data_selection import stratify
11-
from pybalu.feature_transformation import normalize
1240

13-
# Training and Testing data (90% training, 10% testing)
41+
# %% [markdown]
42+
# ## Dataset separation for training and testing
43+
# The dataset is separated into two diferent categories:
44+
# 90% for training and 10% for testing.
45+
46+
# %%
1447
idx_train, idx_test = stratify(classes, .90)
1548
f_train = features[idx_train]
1649
c_train = classes[idx_train]
1750
f_test = features[idx_test]
1851
c_test = classes[idx_test]
1952

53+
# %% [markdown]
54+
# ## Feature normalization
55+
# Features are normalized for better (?) sfs performance
56+
57+
# %%
2058
f_train_norm, a, b = normalize(f_train)
2159
f_test_norm = f_test * a + b
2260

23-
# %%
24-
from pybalu.feature_selection import sfs
2561

62+
# %% [markdown]
63+
# ## Feature selection
64+
65+
# %%
2666
N_FEATURES = 15
2767

2868
selected_feats = sfs(f_train_norm, c_train, n_features=N_FEATURES,
2969
method="fisher", show=True)
3070

3171

32-
# %%
33-
from pybalu.classification import structure
34-
from pybalu.performance_eval import performance
35-
from sklearn.neighbors import KNeighborsClassifier
36-
import matplotlib.pyplot as plt
3772

73+
# %% [markdown]
74+
# ## Performance evaluation
75+
# Feature selection is evaluated iteratively for each subset of
76+
# 1, 2, ..., 15 best features
77+
#
78+
# Data is classified using a K Nearest Neighbors classifier
79+
# using 3 neighbors
80+
81+
# %%
3882
def performance_for_features(feat_idxs):
3983
# train classifier
4084
knn = KNeighborsClassifier(n_neighbors=3)
@@ -48,7 +92,7 @@ def performance_for_features(feat_idxs):
4892
values = [performance_for_features(selected_feats[:i]) * 100
4993
for i in range(1, N_FEATURES + 1)]
5094

51-
plt.plot(values)
95+
plt.bar(*zip(*enumerate(values)), tick_label=range(1, N_FEATURES+1))
5296
plt.title("Performance vs. number of features")
5397
plt.xlabel('selected features')
5498
plt.ylabel('accuracy [%]')

pybalu/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@
1111
"misc",
1212
"performance_eval"]
1313

14-
__version__ = '0.2.2'
14+
__version__ = '0.2.3'

pybalu/performance_eval/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
from ._confusion import confusion
2-
from ._crossval import crossval
3-
from ._performance import performance
1+
from .confusion import confusion
2+
from .crossval import crossval
3+
from .performance import performance
44

55
__all__ = ["confusion", "crossval", "performance"]

0 commit comments

Comments
 (0)