jajupmochi
diff --git a/‎.appveyor.yml
Lines changed: 3 additions & 5 deletions b/‎.appveyor.yml
Lines changed: 3 additions & 5 deletions
diff --git a/‎.travis.yml
Lines changed: 0 additions & 1 deletion b/‎.travis.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 13 additions & 13 deletions b/‎README.md
Lines changed: 13 additions & 13 deletions
diff --git a/‎gklearn/examples/ged/__init__.py b/‎gklearn/examples/ged/__init__.py
diff --git a/‎gklearn/examples/compute_graph_edit_distance.py renamed to ‎gklearn/examples/ged/compute_graph_edit_distance.py b/‎gklearn/examples/compute_graph_edit_distance.py renamed to ‎gklearn/examples/ged/compute_graph_edit_distance.py
diff --git a/‎gklearn/examples/kernels/__init__.py b/‎gklearn/examples/kernels/__init__.py
diff --git a/‎gklearn/examples/compute_distance_in_kernel_space.py renamed to ‎gklearn/examples/kernels/compute_distance_in_kernel_space.py b/‎gklearn/examples/compute_distance_in_kernel_space.py renamed to ‎gklearn/examples/kernels/compute_distance_in_kernel_space.py
diff --git a/‎gklearn/examples/compute_graph_kernel.py renamed to ‎gklearn/examples/kernels/compute_graph_kernel.py b/‎gklearn/examples/compute_graph_kernel.py renamed to ‎gklearn/examples/kernels/compute_graph_kernel.py
diff --git a/‎gklearn/examples/kernels/compute_graph_kernel_old.py
Lines changed: 31 additions & 0 deletions b/‎gklearn/examples/kernels/compute_graph_kernel_old.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎gklearn/examples/kernels/model_selection_old.py
Lines changed: 38 additions & 0 deletions b/‎gklearn/examples/kernels/model_selection_old.py
Lines changed: 38 additions & 0 deletions
diff --git a/‎gklearn/examples/preimage/__init__.py b/‎gklearn/examples/preimage/__init__.py
diff --git a/‎gklearn/examples/median_preimege_generator.py renamed to ‎gklearn/examples/preimage/median_preimege_generator.py b/‎gklearn/examples/median_preimege_generator.py renamed to ‎gklearn/examples/preimage/median_preimege_generator.py
diff --git a/‎gklearn/examples/preimage/median_preimege_generator_cml.py
Lines changed: 113 additions & 0 deletions b/‎gklearn/examples/preimage/median_preimege_generator_cml.py
Lines changed: 113 additions & 0 deletions
diff --git a/‎gklearn/examples/preimage/median_preimege_generator_py.py
Lines changed: 114 additions & 0 deletions b/‎gklearn/examples/preimage/median_preimege_generator_py.py
Lines changed: 114 additions & 0 deletions
@@ -1,7 +1,5 @@
 environment:
   matrix:
-    - PYTHON: "C:\\Python35"
-    - PYTHON: "C:\\Python35-x64"
     - PYTHON: "C:\\Python36"
     - PYTHON: "C:\\Python36-x64"
     - PYTHON: "C:\\Python37"
@@ -17,12 +15,12 @@ environment:
 
 install:
   - "%PYTHON%\\python.exe -m pip install -U pip"
-  - "%PYTHON%\\python.exe -m pip install -U pytest"
-  - "%PYTHON%\\python.exe -m pip install -r requirements.txt"
   - "%PYTHON%\\python.exe -m pip install wheel"
+  - "%PYTHON%\\python.exe -m pip install -r requirements.txt"
+  - "%PYTHON%\\python.exe -m pip install -U pytest"
 
 build: off
 
 test_script:
   - "%PYTHON%\\python.exe setup.py bdist_wheel"
-  - "%PYTHON%\\python.exe -m pytest -v gklearn/tests/"
+  - "%PYTHON%\\python.exe -m pytest -v gklearn/tests/ --ignore=gklearn/tests/test_median_preimage_generator.py"
@@ -1,7 +1,6 @@
 language: python
 
 python:
-- '3.5'
 - '3.6'
 - '3.7'
 - '3.8'
 
@@ -9,7 +9,7 @@ A Python package for graph kernels, graph edit distances and graph pre-image pro
 
 ## Requirements
 
-* python>=3.5
+* python>=3.6
 * numpy>=1.16.2
 * scipy>=1.1.0
 * matplotlib>=3.1.0
@@ -65,27 +65,27 @@ The docs of the library can be found [here](https://graphkit-learn.readthedocs.i
 ### 1 List of graph kernels
 
 * Based on walks
-  * [The common walk kernel](gklearn/kernels/common_walk.py) [1]
+  * [The common walk kernel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/common_walk.py) [1]
     * Exponential
     * Geometric
-  * [The marginalized kenrel](gklearn/kernels/marginalized.py)
+  * [The marginalized kenrel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/marginalized.py)
     * With tottering [2]
     * Without tottering [7]
-  * [The generalized random walk kernel](gklearn/kernels/random_walk.py) [3]
-    * [Sylvester equation](gklearn/kernels/sylvester_equation.py)
+  * [The generalized random walk kernel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/random_walk.py) [3]
+    * [Sylvester equation](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/sylvester_equation.py)
     * Conjugate gradient
     * Fixed-point iterations
-    * [Spectral decomposition](gklearn/kernels/spectral_decomposition.py)
+    * [Spectral decomposition](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/spectral_decomposition.py)
 * Based on paths
-  * [The shortest path kernel](gklearn/kernels/shortest_path.py) [4]
-  * [The structural shortest path kernel](gklearn/kernels/structural_sp.py) [5]
-  * [The path kernel up to length h](gklearn/kernels/path_up_to_h.py) [6]
+  * [The shortest path kernel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/shortest_path.py) [4]
+  * [The structural shortest path kernel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/structural_sp.py) [5]
+  * [The path kernel up to length h](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/path_up_to_h.py) [6]
     * The Tanimoto kernel
     * The MinMax kernel
 * Non-linear kernels
-  * [The treelet kernel](gklearn/kernels/treelet.py) [10]
-  * [Weisfeiler-Lehman kernel](gklearn/kernels/weisfeiler_lehman.py) [11]
-    * [Subtree](gklearn/kernels/weisfeiler_lehman.py#L479)
+  * [The treelet kernel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/treelet.py) [10]
+  * [Weisfeiler-Lehman kernel](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/weisfeiler_lehman.py) [11]
+    * [Subtree](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/kernels/weisfeiler_lehman.py#L479)
 
 A demo of computing graph kernels can be found on [Google Colab](https://colab.research.google.com/drive/17Q2QCl9CAtDweGF8LiWnWoN2laeJqT0u?usp=sharing) and in the [`examples`](https://github.com/jajupmochi/graphkit-learn/blob/master/gklearn/examples/compute_graph_kernel.py) folder.
 
@@ -97,7 +97,7 @@ A demo of generating graph preimages can be found on [Google Colab](https://cola
 
 ### 4 Interface to `GEDLIB`
 
-[`GEDLIB`](https://github.com/dbblumenthal/gedlib) is an easily extensible C++ library for (suboptimally) computing the graph edit distance between attributed graphs. [A Python interface](gklearn/gedlib) for `GEDLIB` is integrated in this library, based on [`gedlibpy`](https://github.com/Ryurin/gedlibpy) library.
+[`GEDLIB`](https://github.com/dbblumenthal/gedlib) is an easily extensible C++ library for (suboptimally) computing the graph edit distance between attributed graphs. [A Python interface](https://github.com/jajupmochi/graphkit-learn/tree/master/gklearn/gedlib) for `GEDLIB` is integrated in this library, based on [`gedlibpy`](https://github.com/Ryurin/gedlibpy) library.
 
 ### 5 Computation optimization methods
 
 
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+"""compute_graph_kernel_v0.1.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/10jUz7-ahPiE_T1qvFrh2NvCVs1e47noj
+
+**This script demonstrates how to compute a graph kernel.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils.graphfiles import loadDataset
+
+graphs, targets = loadDataset('../../../datasets/MUTAG/MUTAG_A.txt')
+
+"""**2.  Compute graph kernel.**"""
+
+from gklearn.kernels import untilhpathkernel
+
+gram_matrix, run_time = untilhpathkernel(
+	graphs, # The list of input graphs.
+	depth=5, # The longest length of paths.
+	k_func='MinMax', # Or 'tanimoto'.
+	compute_method='trie', # Or 'naive'.
+	n_jobs=1, # The number of jobs to run in parallel.
+	verbose=True)
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+"""model_selection_old.ipynb
+
+Automatically generated by Colaboratory.
+
+Original file is located at
+    https://colab.research.google.com/drive/1uVkl7scNgEPrimX8ks6iEC5ijuhB8L_D
+
+**This script demonstrates how to compute a graph kernel.**
+---
+
+**0.   Install `graphkit-learn`.**
+"""
+
+"""**1. Perform model seletion and classification.**"""
+
+from gklearn.utils import model_selection_for_precomputed_kernel
+from gklearn.kernels import untilhpathkernel
+import numpy as np
+
+# Set parameters.
+datafile = '../../../datasets/MUTAG/MUTAG_A.txt'
+param_grid_precomputed = {'depth': np.linspace(1, 10, 10),
+                          'k_func': ['MinMax', 'tanimoto'],
+                          'compute_method': ['trie']}
+param_grid = {'C': np.logspace(-10, 10, num=41, base=10)}
+
+# Perform model selection and classification.
+model_selection_for_precomputed_kernel(
+	datafile, # The path of dataset file.
+	untilhpathkernel, # The graph kernel used for estimation.
+	param_grid_precomputed, # The parameters used to compute gram matrices.
+	param_grid, # The penelty Parameters used for penelty items.
+	'classification', # Or 'regression'.
+	NUM_TRIALS=30, # The number of the random trials of the outer CV loop.
+	ds_name='MUTAG', # The name of the dataset.
+	n_jobs=1,
+	verbose=True)
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 16 15:41:26 2020
+
+@author: ljia
+
+**This script demonstrates how to generate a graph preimage using Boria's method with cost matrices learning.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset, split_dataset_by_target
+
+# Predefined dataset name, use dataset "MAO".
+ds_name = 'MAO'
+# The node/edge labels that will not be used in the computation.
+irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+
+# Initialize a Dataset.
+dataset_all = Dataset()
+# Load predefined dataset "MAO".
+dataset_all.load_predefined_dataset(ds_name)
+# Remove irrelevant labels.
+dataset_all.remove_labels(**irrelevant_labels)
+# Split the whole dataset according to the classification targets.
+datasets = split_dataset_by_target(dataset_all)
+# Get the first class of graphs, whose median preimage will be computed.
+dataset = datasets[0]
+len(dataset.graphs)
+
+"""**2.  Set parameters.**"""
+
+import multiprocessing
+
+# Parameters for MedianPreimageGenerator (our method).
+mpg_options = {'init_method': 'random', # how to initialize node label cost vector. "random" means to initialize randomly.
+			   'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
+			   'ds_name': ds_name, # name of the dataset.
+			   'parallel': True, # @todo: whether the parallel scheme is to be used.
+			   'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
+			   'max_itrs': 3, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
+			   'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
+			   'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
+			   'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
+			   'verbose': 2 # whether to print out results.
+               }
+# Parameters for graph kernel computation.
+kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
+				  'depth': 9,
+				  'k_func': 'MinMax',
+				  'compute_method': 'trie',
+				  'parallel': 'imap_unordered', # or None
+				  'n_jobs': multiprocessing.cpu_count(),
+				  'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
+				  'verbose': 2 # whether to print out results.
+                  }
+# Parameters for GED computation.
+ged_options = {'method': 'BIPARTITE', # use Bipartite huristic.
+			   'initialization_method': 'RANDOM', # or 'NODE', etc.
+			   'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
+			   'edit_cost': 'CONSTANT', # @todo: not needed. use CONSTANT cost.
+			   'attr_distance': 'euclidean', # @todo: not needed. the distance between non-symbolic node/edge labels is computed by euclidean distance.
+			   'ratio_runs_from_initial_solutions': 1,
+			   'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
+			   'init_option': 'LAZY_WITHOUT_SHUFFLED_COPIES' # 'EAGER_WITHOUT_SHUFFLED_COPIES'
+               }
+# Parameters for MedianGraphEstimator (Boria's method).
+mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
+			   'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
+			   'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
+			   'verbose': 2, # whether to print out results.
+			   'refine': False # whether to refine the final SODs or not.
+               }
+print('done.')
+
+"""**3.   Run median preimage generator.**"""
+
+from gklearn.preimage import MedianPreimageGeneratorCML
+
+# Create median preimage generator instance.
+mpg = MedianPreimageGeneratorCML()
+# Add dataset.
+mpg.dataset = dataset
+# Set parameters.
+mpg.set_options(**mpg_options.copy())
+mpg.kernel_options = kernel_options.copy()
+mpg.ged_options = ged_options.copy()
+mpg.mge_options = mge_options.copy()
+# Run.
+mpg.run()
+
+"""**4. Get results.**"""
+
+# Get results.
+import pprint
+pp = pprint.PrettyPrinter(indent=4) # pretty print
+results = mpg.get_results()
+pp.pprint(results)
+
+# Draw generated graphs.
+def draw_graph(graph):
+	import matplotlib.pyplot as plt
+	import networkx as nx
+	plt.figure()
+	pos = nx.spring_layout(graph)
+	nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
+	plt.show()
+	plt.clf()
+	plt.close()
+ 
+draw_graph(mpg.set_median)
+draw_graph(mpg.gen_median)
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jun 16 15:41:26 2020
+
+@author: ljia
+
+**This script demonstrates how to generate a graph preimage using Boria's method with cost matrices learning.**
+"""
+
+"""**1.   Get dataset.**"""
+
+from gklearn.utils import Dataset, split_dataset_by_target
+
+# Predefined dataset name, use dataset "MAO".
+ds_name = 'MAO'
+# The node/edge labels that will not be used in the computation.
+irrelevant_labels = {'node_attrs': ['x', 'y', 'z'], 'edge_labels': ['bond_stereo']}
+
+# Initialize a Dataset.
+dataset_all = Dataset()
+# Load predefined dataset "MAO".
+dataset_all.load_predefined_dataset(ds_name)
+# Remove irrelevant labels.
+dataset_all.remove_labels(**irrelevant_labels)
+# Split the whole dataset according to the classification targets.
+datasets = split_dataset_by_target(dataset_all)
+# Get the first class of graphs, whose median preimage will be computed.
+dataset = datasets[0]
+# dataset.cut_graphs(range(0, 10))
+len(dataset.graphs)
+
+"""**2.  Set parameters.**"""
+
+import multiprocessing
+
+# Parameters for MedianPreimageGenerator (our method).
+mpg_options = {'fit_method': 'k-graphs', # how to fit edit costs. "k-graphs" means use all graphs in median set when fitting.
+			   'init_ecc': [4, 4, 2, 1, 1, 1], # initial edit costs.
+			   'ds_name': ds_name, # name of the dataset.
+			   'parallel': True, # @todo: whether the parallel scheme is to be used.
+			   'time_limit_in_sec': 0, # maximum time limit to compute the preimage. If set to 0 then no limit.
+			   'max_itrs': 100, # maximum iteration limit to optimize edit costs. If set to 0 then no limit.
+			   'max_itrs_without_update': 3, # If the times that edit costs is not update is more than this number, then the optimization stops.
+			   'epsilon_residual': 0.01, # In optimization, the residual is only considered changed if the change is bigger than this number.
+			   'epsilon_ec': 0.1, # In optimization, the edit costs are only considered changed if the changes are bigger than this number.
+			   'verbose': 2 # whether to print out results.
+               }
+# Parameters for graph kernel computation.
+kernel_options = {'name': 'PathUpToH', # use path kernel up to length h.
+				  'depth': 9,
+				  'k_func': 'MinMax',
+				  'compute_method': 'trie',
+				  'parallel': 'imap_unordered', # or None
+				  'n_jobs': multiprocessing.cpu_count(),
+				  'normalize': True, # whether to use normalized Gram matrix to optimize edit costs.
+				  'verbose': 2 # whether to print out results.
+                  }
+# Parameters for GED computation.
+ged_options = {'method': 'BIPARTITE', # use Bipartite huristic.
+			   'initialization_method': 'RANDOM', # or 'NODE', etc.
+			   'initial_solutions': 10, # when bigger than 1, then the method is considered mIPFP.
+			   'edit_cost': 'CONSTANT', # use CONSTANT cost.
+			   'attr_distance': 'euclidean', # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+			   'ratio_runs_from_initial_solutions': 1,
+			   'threads': multiprocessing.cpu_count(), # parallel threads. Do not work if mpg_options['parallel'] = False.
+			   'init_option': 'LAZY_WITHOUT_SHUFFLED_COPIES' # 'EAGER_WITHOUT_SHUFFLED_COPIES'
+               }
+# Parameters for MedianGraphEstimator (Boria's method).
+mge_options = {'init_type': 'MEDOID', # how to initial median (compute set-median). "MEDOID" is to use the graph with smallest SOD.
+			   'random_inits': 10, # number of random initialization when 'init_type' = 'RANDOM'.
+			   'time_limit': 600, # maximum time limit to compute the generalized median. If set to 0 then no limit.
+			   'verbose': 2, # whether to print out results.
+			   'refine': False # whether to refine the final SODs or not.
+               }
+print('done.')
+
+"""**3.   Run median preimage generator.**"""
+
+from gklearn.preimage import MedianPreimageGeneratorPy
+
+# Create median preimage generator instance.
+mpg = MedianPreimageGeneratorPy()
+# Add dataset.
+mpg.dataset = dataset
+# Set parameters.
+mpg.set_options(**mpg_options.copy())
+mpg.kernel_options = kernel_options.copy()
+mpg.ged_options = ged_options.copy()
+mpg.mge_options = mge_options.copy()
+# Run.
+mpg.run()
+
+"""**4. Get results.**"""
+
+# Get results.
+import pprint
+pp = pprint.PrettyPrinter(indent=4) # pretty print
+results = mpg.get_results()
+pp.pprint(results)
+
+# Draw generated graphs.
+def draw_graph(graph):
+	import matplotlib.pyplot as plt
+	import networkx as nx
+	plt.figure()
+	pos = nx.spring_layout(graph)
+	nx.draw(graph, pos, node_size=500, labels=nx.get_node_attributes(graph, 'atom_symbol'), font_color='w', width=3, with_labels=True)
+	plt.show()
+	plt.clf()
+	plt.close()
+ 
+draw_graph(mpg.set_median)
+draw_graph(mpg.gen_median)
-Original file line number
+Diff line change
@@ @@ -1,7 +1,6 @@ @@
 language: python
 python:
 -- '3.5'
 - '3.6'
 - '3.7'
 - '3.8'