Skip to content

Commit a1ab6aa

Browse files
committed
Add tests
1 parent ab97247 commit a1ab6aa

File tree

7 files changed

+470
-0
lines changed

7 files changed

+470
-0
lines changed

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,12 @@ dependencies = [
5151

5252
[project.optional-dependencies]
5353
deep_learning = ["keras", "torch"]
54+
55+
# PyTest Configuration. Later, PyTest will support the [tool.pytest] table.
56+
[tool.pytest.ini_options]
57+
minversion = "6.0"
58+
addopts = "-ra -q"
59+
testpaths = [
60+
"tests",
61+
"integration",
62+
]

tests/example_clustering_2.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import numpy
2+
import matplotlib.pyplot
3+
import pygad
4+
5+
cluster1_num_samples = 10
6+
cluster1_x1_start = 0
7+
cluster1_x1_end = 5
8+
cluster1_x2_start = 2
9+
cluster1_x2_end = 6
10+
cluster1_x1 = numpy.random.random(size=(cluster1_num_samples))
11+
cluster1_x1 = cluster1_x1 * (cluster1_x1_end - cluster1_x1_start) + cluster1_x1_start
12+
cluster1_x2 = numpy.random.random(size=(cluster1_num_samples))
13+
cluster1_x2 = cluster1_x2 * (cluster1_x2_end - cluster1_x2_start) + cluster1_x2_start
14+
15+
cluster2_num_samples = 10
16+
cluster2_x1_start = 10
17+
cluster2_x1_end = 15
18+
cluster2_x2_start = 8
19+
cluster2_x2_end = 12
20+
cluster2_x1 = numpy.random.random(size=(cluster2_num_samples))
21+
cluster2_x1 = cluster2_x1 * (cluster2_x1_end - cluster2_x1_start) + cluster2_x1_start
22+
cluster2_x2 = numpy.random.random(size=(cluster2_num_samples))
23+
cluster2_x2 = cluster2_x2 * (cluster2_x2_end - cluster2_x2_start) + cluster2_x2_start
24+
25+
c1 = numpy.array([cluster1_x1, cluster1_x2]).T
26+
c2 = numpy.array([cluster2_x1, cluster2_x2]).T
27+
28+
data = numpy.concatenate((c1, c2), axis=0)
29+
30+
matplotlib.pyplot.scatter(cluster1_x1, cluster1_x2)
31+
matplotlib.pyplot.scatter(cluster2_x1, cluster2_x2)
32+
matplotlib.pyplot.title("Optimal Clustering")
33+
matplotlib.pyplot.show()
34+
35+
def euclidean_distance(X, Y):
36+
"""
37+
Calculate the euclidean distance between X and Y. It accepts:
38+
:X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample.
39+
:Y should be of size f. In other words, it is a single sample.
40+
41+
Returns a vector of N elements with the distances between the N samples and the Y.
42+
"""
43+
44+
return numpy.sqrt(numpy.sum(numpy.power(X - Y, 2), axis=1))
45+
46+
def cluster_data(solution, solution_idx):
47+
"""
48+
Clusters the data based on the current solution.
49+
"""
50+
51+
global num_cluster, data
52+
feature_vector_length = data.shape[1]
53+
cluster_centers = [] # A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample.
54+
all_clusters_dists = [] # A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples.
55+
clusters = [] # A list with C elements where each element holds the indices of the samples within a cluster.
56+
clusters_sum_dist = [] # A list with C elements where each element represents the sum of distances of the samples with a cluster.
57+
58+
for clust_idx in range(num_clusters):
59+
# Return the current cluster center.
60+
cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)])
61+
# Calculate the distance (e.g. euclidean) between the current cluster center and all samples.
62+
cluster_center_dists = euclidean_distance(data, cluster_centers[clust_idx])
63+
all_clusters_dists.append(numpy.array(cluster_center_dists))
64+
65+
cluster_centers = numpy.array(cluster_centers)
66+
all_clusters_dists = numpy.array(all_clusters_dists)
67+
68+
# A 1D array that, for each sample, holds the index of the cluster with the smallest distance.
69+
# In other words, the array holds the sample's cluster index.
70+
cluster_indices = numpy.argmin(all_clusters_dists, axis=0)
71+
for clust_idx in range(num_clusters):
72+
clusters.append(numpy.where(cluster_indices == clust_idx)[0])
73+
# Calculate the sum of distances for the cluster.
74+
if len(clusters[clust_idx]) == 0:
75+
# In case the cluster is empty (i.e. has zero samples).
76+
clusters_sum_dist.append(0)
77+
else:
78+
# When the cluster is not empty (i.e. has at least 1 sample).
79+
clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx, clusters[clust_idx]]))
80+
# clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx])))
81+
82+
clusters_sum_dist = numpy.array(clusters_sum_dist)
83+
84+
return cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist
85+
86+
def fitness_func(ga_instance, solution, solution_idx):
87+
_, _, _, _, clusters_sum_dist = cluster_data(solution, solution_idx)
88+
89+
# The tiny value 0.00000001 is added to the denominator in case the average distance is 0.
90+
fitness = 1.0 / (numpy.sum(clusters_sum_dist) + 0.00000001)
91+
92+
return fitness
93+
94+
num_clusters = 2
95+
num_genes = num_clusters * data.shape[1]
96+
97+
ga_instance = pygad.GA(num_generations=100,
98+
sol_per_pop=10,
99+
num_parents_mating=5,
100+
init_range_low=-6,
101+
init_range_high=20,
102+
keep_parents=2,
103+
num_genes=num_genes,
104+
fitness_func=fitness_func,
105+
suppress_warnings=True)
106+
107+
ga_instance.run()
108+
109+
best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution()
110+
print("Best solution is {bs}".format(bs=best_solution))
111+
print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness))
112+
print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation))
113+
114+
cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx)
115+
116+
for cluster_idx in range(num_clusters):
117+
cluster_x = data[clusters[cluster_idx], 0]
118+
cluster_y = data[clusters[cluster_idx], 1]
119+
matplotlib.pyplot.scatter(cluster_x, cluster_y)
120+
matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5)
121+
matplotlib.pyplot.title("Clustering using PyGAD")
122+
matplotlib.pyplot.show()

tests/example_clustering_3.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import numpy
2+
import matplotlib.pyplot
3+
import pygad
4+
5+
cluster1_num_samples = 20
6+
cluster1_x1_start = 0
7+
cluster1_x1_end = 5
8+
cluster1_x2_start = 2
9+
cluster1_x2_end = 6
10+
cluster1_x1 = numpy.random.random(size=(cluster1_num_samples))
11+
cluster1_x1 = cluster1_x1 * (cluster1_x1_end - cluster1_x1_start) + cluster1_x1_start
12+
cluster1_x2 = numpy.random.random(size=(cluster1_num_samples))
13+
cluster1_x2 = cluster1_x2 * (cluster1_x2_end - cluster1_x2_start) + cluster1_x2_start
14+
15+
cluster2_num_samples = 20
16+
cluster2_x1_start = 4
17+
cluster2_x1_end = 12
18+
cluster2_x2_start = 14
19+
cluster2_x2_end = 18
20+
cluster2_x1 = numpy.random.random(size=(cluster2_num_samples))
21+
cluster2_x1 = cluster2_x1 * (cluster2_x1_end - cluster2_x1_start) + cluster2_x1_start
22+
cluster2_x2 = numpy.random.random(size=(cluster2_num_samples))
23+
cluster2_x2 = cluster2_x2 * (cluster2_x2_end - cluster2_x2_start) + cluster2_x2_start
24+
25+
cluster3_num_samples = 20
26+
cluster3_x1_start = 12
27+
cluster3_x1_end = 18
28+
cluster3_x2_start = 8
29+
cluster3_x2_end = 11
30+
cluster3_x1 = numpy.random.random(size=(cluster3_num_samples))
31+
cluster3_x1 = cluster3_x1 * (cluster3_x1_end - cluster3_x1_start) + cluster3_x1_start
32+
cluster3_x2 = numpy.random.random(size=(cluster3_num_samples))
33+
cluster3_x2 = cluster3_x2 * (cluster3_x2_end - cluster3_x2_start) + cluster3_x2_start
34+
35+
c1 = numpy.array([cluster1_x1, cluster1_x2]).T
36+
c2 = numpy.array([cluster2_x1, cluster2_x2]).T
37+
c3 = numpy.array([cluster3_x1, cluster3_x2]).T
38+
39+
data = numpy.concatenate((c1, c2, c3), axis=0)
40+
41+
matplotlib.pyplot.scatter(cluster1_x1, cluster1_x2)
42+
matplotlib.pyplot.scatter(cluster2_x1, cluster2_x2)
43+
matplotlib.pyplot.scatter(cluster3_x1, cluster3_x2)
44+
matplotlib.pyplot.title("Optimal Clustering")
45+
matplotlib.pyplot.show()
46+
47+
def euclidean_distance(X, Y):
48+
"""
49+
Calculate the euclidean distance between X and Y. It accepts:
50+
:X should be a matrix of size (N, f) where N is the number of samples and f is the number of features for each sample.
51+
:Y should be of size f. In other words, it is a single sample.
52+
53+
Returns a vector of N elements with the distances between the N samples and the Y.
54+
"""
55+
56+
return numpy.sqrt(numpy.sum(numpy.power(X - Y, 2), axis=1))
57+
58+
def cluster_data(solution, solution_idx):
59+
"""
60+
Clusters the data based on the current solution.
61+
"""
62+
63+
global num_clusters, feature_vector_length, data
64+
cluster_centers = [] # A list of size (C, f) where C is the number of clusters and f is the number of features representing each sample.
65+
all_clusters_dists = [] # A list of size (C, N) where C is the number of clusters and N is the number of data samples. It holds the distances between each cluster center and all the data samples.
66+
clusters = [] # A list with C elements where each element holds the indices of the samples within a cluster.
67+
clusters_sum_dist = [] # A list with C elements where each element represents the sum of distances of the samples with a cluster.
68+
69+
for clust_idx in range(num_clusters):
70+
# Return the current cluster center.
71+
cluster_centers.append(solution[feature_vector_length*clust_idx:feature_vector_length*(clust_idx+1)])
72+
# Calculate the distance (e.g. euclidean) between the current cluster center and all samples.
73+
cluster_center_dists = euclidean_distance(data, cluster_centers[clust_idx])
74+
all_clusters_dists.append(numpy.array(cluster_center_dists))
75+
76+
cluster_centers = numpy.array(cluster_centers)
77+
all_clusters_dists = numpy.array(all_clusters_dists)
78+
79+
# A 1D array that, for each sample, holds the index of the cluster with the smallest distance.
80+
# In other words, the array holds the sample's cluster index.
81+
cluster_indices = numpy.argmin(all_clusters_dists, axis=0)
82+
for clust_idx in range(num_clusters):
83+
clusters.append(numpy.where(cluster_indices == clust_idx)[0])
84+
# Calculate the sum of distances for the cluster.
85+
if len(clusters[clust_idx]) == 0:
86+
# In case the cluster is empty (i.e. has zero samples).
87+
clusters_sum_dist.append(0)
88+
else:
89+
# When the cluster is not empty (i.e. has at least 1 sample).
90+
clusters_sum_dist.append(numpy.sum(all_clusters_dists[clust_idx, clusters[clust_idx]]))
91+
# clusters_sum_dist.append(numpy.sum(euclidean_distance(data[clusters[clust_idx], :], cluster_centers[clust_idx])))
92+
93+
clusters_sum_dist = numpy.array(clusters_sum_dist)
94+
95+
return cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist
96+
97+
def fitness_func(ga_instance, solution, solution_idx):
98+
_, _, _, _, clusters_sum_dist = cluster_data(solution, solution_idx)
99+
100+
# The tiny value 0.00000001 is added to the denominator in case the average distance is 0.
101+
fitness = 1.0 / (numpy.sum(clusters_sum_dist) + 0.00000001)
102+
103+
return fitness
104+
105+
num_clusters = 3
106+
feature_vector_length = data.shape[1]
107+
num_genes = num_clusters * feature_vector_length
108+
109+
ga_instance = pygad.GA(num_generations=100,
110+
sol_per_pop=10,
111+
init_range_low=0,
112+
init_range_high=20,
113+
num_parents_mating=5,
114+
keep_parents=2,
115+
num_genes=num_genes,
116+
fitness_func=fitness_func,
117+
suppress_warnings=True)
118+
119+
ga_instance.run()
120+
121+
best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution()
122+
print("Best solution is {bs}".format(bs=best_solution))
123+
print("Fitness of the best solution is {bsf}".format(bsf=best_solution_fitness))
124+
print("Best solution found after {gen} generations".format(gen=ga_instance.best_solution_generation))
125+
126+
cluster_centers, all_clusters_dists, cluster_indices, clusters, clusters_sum_dist = cluster_data(best_solution, best_solution_idx)
127+
128+
for cluster_idx in range(num_clusters):
129+
cluster_x = data[clusters[cluster_idx], 0]
130+
cluster_y = data[clusters[cluster_idx], 1]
131+
matplotlib.pyplot.scatter(cluster_x, cluster_y)
132+
matplotlib.pyplot.scatter(cluster_centers[cluster_idx, 0], cluster_centers[cluster_idx, 1], linewidths=5)
133+
matplotlib.pyplot.title("Clustering using PyGAD")
134+
matplotlib.pyplot.show()

tests/example_custom_operators.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import pygad
2+
import numpy
3+
4+
"""
5+
This script gives an example of using custom user-defined functions for the 3 operators:
6+
1) Parent selection.
7+
2) Crossover.
8+
3) Mutation.
9+
For more information, check the User-Defined Crossover, Mutation, and Parent Selection Operators section in the documentation:
10+
https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#user-defined-crossover-mutation-and-parent-selection-operators
11+
"""
12+
13+
equation_inputs = [4,-2,3.5]
14+
desired_output = 44
15+
16+
def fitness_func(ga_instance, solution, solution_idx):
17+
output = numpy.sum(solution * equation_inputs)
18+
19+
fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001)
20+
21+
return fitness
22+
23+
def parent_selection_func(fitness, num_parents, ga_instance):
24+
# Selects the best {num_parents} parents. Works as steady-state selection.
25+
26+
fitness_sorted = sorted(range(len(fitness)), key=lambda k: fitness[k])
27+
fitness_sorted.reverse()
28+
29+
parents = numpy.empty((num_parents, ga_instance.population.shape[1]))
30+
31+
for parent_num in range(num_parents):
32+
parents[parent_num, :] = ga_instance.population[fitness_sorted[parent_num], :].copy()
33+
34+
return parents, numpy.array(fitness_sorted[:num_parents])
35+
36+
def crossover_func(parents, offspring_size, ga_instance):
37+
# This is single-point crossover.
38+
offspring = []
39+
idx = 0
40+
while len(offspring) != offspring_size[0]:
41+
parent1 = parents[idx % parents.shape[0], :].copy()
42+
parent2 = parents[(idx + 1) % parents.shape[0], :].copy()
43+
44+
random_split_point = numpy.random.choice(range(offspring_size[0]))
45+
46+
parent1[random_split_point:] = parent2[random_split_point:]
47+
48+
offspring.append(parent1)
49+
50+
idx += 1
51+
52+
return numpy.array(offspring)
53+
54+
def mutation_func(offspring, ga_instance):
55+
# This is random mutation that mutates a single gene.
56+
for chromosome_idx in range(offspring.shape[0]):
57+
# Make some random changes in 1 or more genes.
58+
random_gene_idx = numpy.random.choice(range(offspring.shape[1]))
59+
60+
offspring[chromosome_idx, random_gene_idx] += numpy.random.random()
61+
62+
return offspring
63+
64+
ga_instance = pygad.GA(num_generations=10,
65+
sol_per_pop=5,
66+
num_parents_mating=2,
67+
num_genes=len(equation_inputs),
68+
fitness_func=fitness_func,
69+
parent_selection_type=parent_selection_func,
70+
crossover_type=crossover_func,
71+
mutation_type=mutation_func)
72+
73+
ga_instance.run()
74+
ga_instance.plot_fitness()

tests/example_logger.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import logging
2+
import pygad
3+
import numpy
4+
5+
level = logging.DEBUG
6+
name = 'logfile.txt'
7+
8+
logger = logging.getLogger(name)
9+
logger.setLevel(level)
10+
11+
file_handler = logging.FileHandler(name,'a+','utf-8')
12+
file_handler.setLevel(logging.DEBUG)
13+
file_format = logging.Formatter('%(asctime)s %(levelname)s: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
14+
file_handler.setFormatter(file_format)
15+
logger.addHandler(file_handler)
16+
17+
console_handler = logging.StreamHandler()
18+
console_handler.setLevel(logging.INFO)
19+
console_format = logging.Formatter('%(message)s')
20+
console_handler.setFormatter(console_format)
21+
logger.addHandler(console_handler)
22+
23+
equation_inputs = [4, -2, 8]
24+
desired_output = 2671.1234
25+
26+
def fitness_func(ga_instance, solution, solution_idx):
27+
output = numpy.sum(solution * equation_inputs)
28+
fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001)
29+
return fitness
30+
31+
def on_generation(ga_instance):
32+
ga_instance.logger.info("Generation = {generation}".format(generation=ga_instance.generations_completed))
33+
ga_instance.logger.info("Fitness = {fitness}".format(fitness=ga_instance.best_solution(pop_fitness=ga_instance.last_generation_fitness)[1]))
34+
35+
ga_instance = pygad.GA(num_generations=10,
36+
sol_per_pop=40,
37+
num_parents_mating=2,
38+
keep_parents=2,
39+
num_genes=len(equation_inputs),
40+
fitness_func=fitness_func,
41+
on_generation=on_generation,
42+
logger=logger)
43+
ga_instance.run()
44+
45+
logger.handlers.clear()

0 commit comments

Comments
 (0)