Skip to content

Commit db43d65

Browse files
authored
Merge pull request #26 from saibalmars/drop_cvxpy
Drop cvxpy
2 parents db226c4 + 175f761 commit db43d65

File tree

11 files changed

+236
-302
lines changed

11 files changed

+236
-302
lines changed

.travis.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,12 @@ after_success:
108108
- codecov # submit coverage
109109

110110
notifications:
111-
email: false
111+
email: false
112+
113+
deploy:
114+
provider: pypi
115+
username: saibalmars
116+
password: EEXSQQztCfaaXTzPLjpPehXF12nYo17LBvc0BLD40Err3K42uKOmXaWAO6k6Q2dFwKctTvCWGpYgMN1/07SIspEeLr1ivewcdlAXk5XzD1gFuN0i/BHbvs7siNB2wf5qeIqHL8dw9UZrDLfAAuoHrDlLfBf/rbbFFBQHWgJVQOY47XZbfbuMaAdPSLY2ffBnd84fADt+wsmdE3kDw/jIiTsXP8Xj04ZNGmFghI+5KzHxyMaUv0eCrAkyVoVHdlJQvhQHJJrveMGOkt61KaSHmf4j12wqxaJGQcX+1521kk5VtU2QF9q3dYkqqzDk0gSv9FXQJ6OQJiF1Di/pi1q9SZ+CRMnMQSlmyg5dD/OYuYtXN17vA03uxBo7xSlqOurrdNiS+Tw8zmtf/iJs5Ace+fRnk0btnu+sEQ4lJbJEtFdq0PTGviHnccCSzyI3Vwk6xAros7aqH0PRYkDRFR1+EOGG46SatlIfv3VN3eiYwOyQU062g9n9zl9HPh5oTzEq1z1TKfgsN4leWljwbtphfekXZ7kcWsJ1y2NS8fbE3hG1AwPqgyULvaKDJAwNL74KjaPGhIyUUaLqW8y9H3QlL/LmVoXTYqSxtfUz/caweojyS29mGnQ2P045h/RmzEbBnkuf6X1+xA8A5s2vFKIjktVVGXiX6wc90ZzTAXK84II=
117+
edge: true # opt in to dpl v2
118+
on:
119+
tags: true

GraphRicciCurvature/OllivierRicci.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import time
2626
from functools import lru_cache
2727

28-
import cvxpy as cvx
2928
import networkit as nk
3029
import networkx as nx
3130
import numpy as np
@@ -39,13 +38,14 @@
3938
_Gk = nk.graph.Graph()
4039
_alpha = 0.5
4140
_weight = "weight"
42-
_method = "Sinkhorn"
41+
_method = "OTDSinkhornMix"
4342
_base = math.e
4443
_exp_power = 2
4544
_proc = mp.cpu_count()
4645
_cache_maxsize = 1000000
4746
_shortest_path = "all_pairs"
48-
_nbr_topk = 1000
47+
_nbr_topk = 3000
48+
_OTDSinkhorn_threshold = 2000
4949
_apsp = {}
5050

5151

@@ -157,8 +157,8 @@ def _distribute_densities(source, target):
157157
else: # all_pairs
158158
d = _apsp[np.ix_(source_topknbr, target_topknbr)] # transportation matrix
159159

160-
x = np.array([x]).T # the mass that source neighborhood initially owned
161-
y = np.array([y]).T # the mass that target neighborhood needs to received
160+
x = np.array(x) # the mass that source neighborhood initially owned
161+
y = np.array(y) # the mass that target neighborhood needs to received
162162

163163
logger.debug("%8f secs density matrix construction for edge." % (time.time() - t0))
164164

@@ -221,20 +221,9 @@ def _optimal_transportation_distance(x, y, d):
221221
"""
222222

223223
t0 = time.time()
224-
rho = cvx.Variable((len(y), len(x))) # the transportation plan rho
225-
226-
# objective function d(x,y) * rho * x, need to do element-wise multiply here
227-
obj = cvx.Minimize(cvx.sum(cvx.multiply(np.multiply(d.T, x.T), rho)))
228-
229-
# \sigma_i rho_{ij}=[1,1,...,1]
230-
source_sum = cvx.sum(rho, axis=0, keepdims=True)
231-
constrains = [rho @ x == y, source_sum == np.ones((1, (len(x)))), 0 <= rho, rho <= 1]
232-
prob = cvx.Problem(obj, constrains)
233-
234-
m = prob.solve() # change solver here if you want
235-
# solve for optimal transportation cost
236-
237-
logger.debug("%8f secs for cvxpy. \t#source_nbr: %d, #target_nbr: %d" % (time.time() - t0, len(x), len(y)))
224+
m = ot.emd2(x, y, d)
225+
logger.debug(
226+
"%8f secs for Wasserstein dist. \t#source_nbr: %d, #target_nbr: %d" % (time.time() - t0, len(x), len(y)))
238227

239228
return m
240229

@@ -260,7 +249,7 @@ def _sinkhorn_distance(x, y, d):
260249
t0 = time.time()
261250
m = ot.sinkhorn2(x, y, d, 1e-1, method='sinkhorn')[0]
262251
logger.debug(
263-
"%8f secs for Sinkhorn. dist. \t#source_nbr: %d, #target_nbr: %d" % (time.time() - t0, len(x), len(y)))
252+
"%8f secs for Sinkhorn dist. \t#source_nbr: %d, #target_nbr: %d" % (time.time() - t0, len(x), len(y)))
264253

265254
return m
266255

@@ -326,14 +315,14 @@ def _compute_ricci_curvature_single_edge(source, target):
326315

327316
# If the weight of edge is too small, return 0 instead.
328317
if _Gk.weight(source, target) < EPSILON:
329-
logger.warning("Zero weight edge detected for edge (%s,%s), return Ricci Curvature as 0 instead." %
318+
logger.trace("Zero weight edge detected for edge (%s,%s), return Ricci Curvature as 0 instead." %
330319
(source, target))
331320
return {(source, target): 0}
332321

333322
# compute transportation distance
334323
m = 1 # assign an initial cost
335-
assert _method in ["OTD", "ATD", "Sinkhorn"], \
336-
'Method %s not found, support method:["OTD", "ATD", "Sinkhorn"]' % _method
324+
assert _method in ["OTD", "ATD", "Sinkhorn", "OTDSinkhornMix"], \
325+
'Method %s not found, support method:["OTD", "ATD", "Sinkhorn", "OTDSinkhornMix]' % _method
337326
if _method == "OTD":
338327
x, y, d = _distribute_densities(source, target)
339328
m = _optimal_transportation_distance(x, y, d)
@@ -342,6 +331,14 @@ def _compute_ricci_curvature_single_edge(source, target):
342331
elif _method == "Sinkhorn":
343332
x, y, d = _distribute_densities(source, target)
344333
m = _sinkhorn_distance(x, y, d)
334+
elif _method == "OTDSinkhornMix":
335+
x, y, d = _distribute_densities(source, target)
336+
# When x and y are small (usually around 2000 to 3000), ot.emd2 is way faster than ot.sinkhorn2
337+
# So we only do sinkhorn when both x and y are too large for ot.emd2
338+
if len(x) > _OTDSinkhorn_threshold and len(y) > _OTDSinkhorn_threshold:
339+
m = _sinkhorn_distance(x, y, d)
340+
else:
341+
m = _optimal_transportation_distance(x, y, d)
345342

346343
# compute Ricci curvature: k=1-(m_{x,y})/d(x,y)
347344
result = 1 - (m / _Gk.weight(source, target)) # Divided by the length of d(i, j)
@@ -356,9 +353,9 @@ def _wrap_compute_single_edge(stuff):
356353

357354

358355
def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
359-
alpha=0.5, method="OTD",
356+
alpha=0.5, method="OTDSinkhornMix",
360357
base=math.e, exp_power=2, proc=mp.cpu_count(), chunksize=None, cache_maxsize=1000000,
361-
shortest_path="all_pairs", nbr_topk=1000):
358+
shortest_path="all_pairs", nbr_topk=3000):
362359
"""Compute Ricci curvature for edges in given edge lists.
363360
364361
Parameters
@@ -375,12 +372,14 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
375372
E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
376373
(Default value = 0.5)
377374
method : {"OTD", "ATD", "Sinkhorn"}
378-
The optimal transportation distance computation method. (Default value = "OTD")
375+
The optimal transportation distance computation method. (Default value = "OTDSinkhornMix")
379376
380377
Transportation method:
381378
- "OTD" for Optimal Transportation Distance,
382379
- "ATD" for Average Transportation Distance.
383-
- "Sinkhorn" for OTD approximated Sinkhorn distance. (faster)
380+
- "Sinkhorn" for OTD approximated Sinkhorn distance.
381+
- "OTDSinkhornMix" use OTD for nodes of edge with less than _OTDSinkhorn_threshold(default 2000) neighbors,
382+
use Sinkhorn for faster computation with nodes of edge more neighbors. (OTD is faster for smaller cases)
384383
base : float
385384
Base variable for weight distribution. (Default value = `math.e`)
386385
exp_power : float
@@ -396,7 +395,7 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
396395
Method to compute shortest path. (Default value = `all_pairs`)
397396
nbr_topk : int
398397
Only take the top k edge weight neighbors for density distribution.
399-
Smaller k run faster but the result is less accurate. (Default value = 1000)
398+
Smaller k run faster but the result is less accurate. (Default value = 3000)
400399
401400
Returns
402401
-------
@@ -625,10 +624,10 @@ class OllivierRicci:
625624
626625
"""
627626

628-
def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
627+
def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="OTDSinkhornMix",
629628
base=math.e, exp_power=2, proc=mp.cpu_count(), chunksize=None, shortest_path="all_pairs",
630629
cache_maxsize=1000000,
631-
nbr_topk=1000, verbose="ERROR"):
630+
nbr_topk=3000, verbose="ERROR"):
632631
"""Initialized a container to compute Ollivier-Ricci curvature/flow.
633632
634633
Parameters
@@ -643,12 +642,14 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
643642
E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
644643
(Default value = 0.5)
645644
method : {"OTD", "ATD", "Sinkhorn"}
646-
The optimal transportation distance computation method. (Default value = "Sinkhorn")
645+
The optimal transportation distance computation method. (Default value = "OTDSinkhornMix")
647646
648647
Transportation method:
649648
- "OTD" for Optimal Transportation Distance,
650649
- "ATD" for Average Transportation Distance.
651-
- "Sinkhorn" for OTD approximated Sinkhorn distance. (faster)
650+
- "Sinkhorn" for OTD approximated Sinkhorn distance.
651+
- "OTDSinkhornMix" use OTD for nodes of edge with less than _OTDSinkhorn_threshold(default 2000) neighbors,
652+
use Sinkhorn for faster computation with nodes of edge more neighbors. (OTD is faster for smaller cases)
652653
base : float
653654
Base variable for weight distribution. (Default value = `math.e`)
654655
exp_power : float
@@ -664,7 +665,7 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
664665
Set this to `None` for unlimited cache. (Default value = 1000000)
665666
nbr_topk : int
666667
Only take the top k edge weight neighbors for density distribution.
667-
Smaller k run faster but the result is less accurate. (Default value = 1000)
668+
Smaller k run faster but the result is less accurate. (Default value = 3000)
668669
verbose : {"INFO", "TRACE","DEBUG","ERROR"}
669670
Verbose level. (Default value = "ERROR")
670671
- "INFO": show only iteration process log.
@@ -765,13 +766,13 @@ def compute_ricci_curvature(self):
765766
nbr_topk=self.nbr_topk)
766767
return self.G
767768

768-
def compute_ricci_flow(self, iterations=20, step=1, delta=1e-4, surgery=(lambda G, *args, **kwargs: G, 100)):
769+
def compute_ricci_flow(self, iterations=10, step=1, delta=1e-4, surgery=(lambda G, *args, **kwargs: G, 100)):
769770
"""Compute the given Ricci flow metric of each edge of a given connected NetworkX graph.
770771
771772
Parameters
772773
----------
773774
iterations : int
774-
Iterations to require Ricci flow metric. (Default value = 100)
775+
Iterations to require Ricci flow metric. (Default value = 10)
775776
step : float
776777
Step size for gradient decent process. (Default value = 1)
777778
delta : float

GraphRicciCurvature/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
__version__ = "0.5.2.1"
1+
__version__ = "0.5.3"
22
__author__ = "Chien-Chun Ni"
33
__email__ = "saibalmars@gmail.com"

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ Video demonstration of Ricci flow for community detection:
3535

3636
* [NetworkX](https://github.com/networkx/networkx) >= 2.0 (Based Graph library)
3737
* [NetworKit](https://github.com/kit-parco/networkit) >= 6.1 (Shortest path algorithm)
38-
* [CVXPY](https://github.com/cvxgrp/cvxpy) (LP solver for Optimal transportation)
39-
* [NumPy](https://github.com/numpy/numpy) (CVXPY support)
40-
* [POT](https://github.com/rflamary/POT) (For approximate Optimal transportation distance)
38+
* [NumPy](https://github.com/numpy/numpy) (POT support)
39+
* [POT](https://github.com/rflamary/POT) (For optimal transportation distance)
4140
* [python-louvain](https://github.com/taynaud/python-louvain) (For faster modularity computation)
4241

4342

binder/environment.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ dependencies:
77
- pot
88
- scikit-learn
99
- matplotlib
10-
- cvxpy
1110
- pip
1211
- pip:
1312
- GraphRicciCurvature

doc/source/conf.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
autodoc_mock_imports = [
4646
'numpy',
4747
'scipy',
48-
'cvxpy',
4948
'networkx',
5049
'ot',
5150
'networkit',

doc/source/readme.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@ Package Requirement
5353

5454
* `NetworkX <https://github.com/networkx/networkx>`__ (Based Graph library)
5555
* `NetworKit <https://github.com/kit-parco/networkit>`__ (Shortest path algorithm)
56-
* `CVXPY <https://github.com/cvxgrp/cvxpy>`__ (LP solver for Optimal transportation)
57-
* `NumPy <https://github.com/numpy/numpy>`__ (CVXPY support)
56+
* `NumPy <https://github.com/numpy/numpy>`__ (POT support)
5857
* `POT <https://github.com/rflamary/POT>`__ (For approximate Optimal transportation distance)
5958
* `python-louvain <https://github.com/taynaud/python-louvain>`__ (For faster modularity computation)
6059

0 commit comments

Comments
 (0)