25
25
import time
26
26
from functools import lru_cache
27
27
28
- import cvxpy as cvx
29
28
import networkit as nk
30
29
import networkx as nx
31
30
import numpy as np
39
38
_Gk = nk .graph .Graph ()
40
39
_alpha = 0.5
41
40
_weight = "weight"
42
- _method = "Sinkhorn "
41
+ _method = "OTDSinkhornMix "
43
42
_base = math .e
44
43
_exp_power = 2
45
44
_proc = mp .cpu_count ()
46
45
_cache_maxsize = 1000000
47
46
_shortest_path = "all_pairs"
48
- _nbr_topk = 1000
47
+ _nbr_topk = 3000
48
+ _OTDSinkhorn_threshold = 2000
49
49
_apsp = {}
50
50
51
51
@@ -157,8 +157,8 @@ def _distribute_densities(source, target):
157
157
else : # all_pairs
158
158
d = _apsp [np .ix_ (source_topknbr , target_topknbr )] # transportation matrix
159
159
160
- x = np .array ([ x ]). T # the mass that source neighborhood initially owned
161
- y = np .array ([ y ]). T # the mass that target neighborhood needs to received
160
+ x = np .array (x ) # the mass that source neighborhood initially owned
161
+ y = np .array (y ) # the mass that target neighborhood needs to received
162
162
163
163
logger .debug ("%8f secs density matrix construction for edge." % (time .time () - t0 ))
164
164
@@ -221,20 +221,9 @@ def _optimal_transportation_distance(x, y, d):
221
221
"""
222
222
223
223
t0 = time .time ()
224
- rho = cvx .Variable ((len (y ), len (x ))) # the transportation plan rho
225
-
226
- # objective function d(x,y) * rho * x, need to do element-wise multiply here
227
- obj = cvx .Minimize (cvx .sum (cvx .multiply (np .multiply (d .T , x .T ), rho )))
228
-
229
- # \sigma_i rho_{ij}=[1,1,...,1]
230
- source_sum = cvx .sum (rho , axis = 0 , keepdims = True )
231
- constrains = [rho @ x == y , source_sum == np .ones ((1 , (len (x )))), 0 <= rho , rho <= 1 ]
232
- prob = cvx .Problem (obj , constrains )
233
-
234
- m = prob .solve () # change solver here if you want
235
- # solve for optimal transportation cost
236
-
237
- logger .debug ("%8f secs for cvxpy. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
224
+ m = ot .emd2 (x , y , d )
225
+ logger .debug (
226
+ "%8f secs for Wasserstein dist. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
238
227
239
228
return m
240
229
@@ -260,7 +249,7 @@ def _sinkhorn_distance(x, y, d):
260
249
t0 = time .time ()
261
250
m = ot .sinkhorn2 (x , y , d , 1e-1 , method = 'sinkhorn' )[0 ]
262
251
logger .debug (
263
- "%8f secs for Sinkhorn. dist. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
252
+ "%8f secs for Sinkhorn dist. \t #source_nbr: %d, #target_nbr: %d" % (time .time () - t0 , len (x ), len (y )))
264
253
265
254
return m
266
255
@@ -326,14 +315,14 @@ def _compute_ricci_curvature_single_edge(source, target):
326
315
327
316
# If the weight of edge is too small, return 0 instead.
328
317
if _Gk .weight (source , target ) < EPSILON :
329
- logger .warning ("Zero weight edge detected for edge (%s,%s), return Ricci Curvature as 0 instead." %
318
+ logger .trace ("Zero weight edge detected for edge (%s,%s), return Ricci Curvature as 0 instead." %
330
319
(source , target ))
331
320
return {(source , target ): 0 }
332
321
333
322
# compute transportation distance
334
323
m = 1 # assign an initial cost
335
- assert _method in ["OTD" , "ATD" , "Sinkhorn" ], \
336
- 'Method %s not found, support method:["OTD", "ATD", "Sinkhorn"]' % _method
324
+ assert _method in ["OTD" , "ATD" , "Sinkhorn" , "OTDSinkhornMix" ], \
325
+ 'Method %s not found, support method:["OTD", "ATD", "Sinkhorn", "OTDSinkhornMix ]' % _method
337
326
if _method == "OTD" :
338
327
x , y , d = _distribute_densities (source , target )
339
328
m = _optimal_transportation_distance (x , y , d )
@@ -342,6 +331,14 @@ def _compute_ricci_curvature_single_edge(source, target):
342
331
elif _method == "Sinkhorn" :
343
332
x , y , d = _distribute_densities (source , target )
344
333
m = _sinkhorn_distance (x , y , d )
334
+ elif _method == "OTDSinkhornMix" :
335
+ x , y , d = _distribute_densities (source , target )
336
+ # When x and y are small (usually around 2000 to 3000), ot.emd2 is way faster than ot.sinkhorn2
337
+ # So we only do sinkhorn when both x and y are too large for ot.emd2
338
+ if len (x ) > _OTDSinkhorn_threshold and len (y ) > _OTDSinkhorn_threshold :
339
+ m = _sinkhorn_distance (x , y , d )
340
+ else :
341
+ m = _optimal_transportation_distance (x , y , d )
345
342
346
343
# compute Ricci curvature: k=1-(m_{x,y})/d(x,y)
347
344
result = 1 - (m / _Gk .weight (source , target )) # Divided by the length of d(i, j)
@@ -356,9 +353,9 @@ def _wrap_compute_single_edge(stuff):
356
353
357
354
358
355
def _compute_ricci_curvature_edges (G : nx .Graph , weight = "weight" , edge_list = [],
359
- alpha = 0.5 , method = "OTD " ,
356
+ alpha = 0.5 , method = "OTDSinkhornMix " ,
360
357
base = math .e , exp_power = 2 , proc = mp .cpu_count (), chunksize = None , cache_maxsize = 1000000 ,
361
- shortest_path = "all_pairs" , nbr_topk = 1000 ):
358
+ shortest_path = "all_pairs" , nbr_topk = 3000 ):
362
359
"""Compute Ricci curvature for edges in given edge lists.
363
360
364
361
Parameters
@@ -375,12 +372,14 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
375
372
E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
376
373
(Default value = 0.5)
377
374
method : {"OTD", "ATD", "Sinkhorn"}
378
- The optimal transportation distance computation method. (Default value = "OTD ")
375
+ The optimal transportation distance computation method. (Default value = "OTDSinkhornMix ")
379
376
380
377
Transportation method:
381
378
- "OTD" for Optimal Transportation Distance,
382
379
- "ATD" for Average Transportation Distance.
383
- - "Sinkhorn" for OTD approximated Sinkhorn distance. (faster)
380
+ - "Sinkhorn" for OTD approximated Sinkhorn distance.
381
+ - "OTDSinkhornMix" use OTD for nodes of edge with less than _OTDSinkhorn_threshold(default 2000) neighbors,
382
+ use Sinkhorn for faster computation with nodes of edge more neighbors. (OTD is faster for smaller cases)
384
383
base : float
385
384
Base variable for weight distribution. (Default value = `math.e`)
386
385
exp_power : float
@@ -396,7 +395,7 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
396
395
Method to compute shortest path. (Default value = `all_pairs`)
397
396
nbr_topk : int
398
397
Only take the top k edge weight neighbors for density distribution.
399
- Smaller k run faster but the result is less accurate. (Default value = 1000 )
398
+ Smaller k run faster but the result is less accurate. (Default value = 3000 )
400
399
401
400
Returns
402
401
-------
@@ -625,10 +624,10 @@ class OllivierRicci:
625
624
626
625
"""
627
626
628
- def __init__ (self , G : nx .Graph , weight = "weight" , alpha = 0.5 , method = "Sinkhorn " ,
627
+ def __init__ (self , G : nx .Graph , weight = "weight" , alpha = 0.5 , method = "OTDSinkhornMix " ,
629
628
base = math .e , exp_power = 2 , proc = mp .cpu_count (), chunksize = None , shortest_path = "all_pairs" ,
630
629
cache_maxsize = 1000000 ,
631
- nbr_topk = 1000 , verbose = "ERROR" ):
630
+ nbr_topk = 3000 , verbose = "ERROR" ):
632
631
"""Initialized a container to compute Ollivier-Ricci curvature/flow.
633
632
634
633
Parameters
@@ -643,12 +642,14 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
643
642
E.g. x -> y, alpha = 0.4 means 0.4 for x, 0.6 to evenly spread to x's nbr.
644
643
(Default value = 0.5)
645
644
method : {"OTD", "ATD", "Sinkhorn"}
646
- The optimal transportation distance computation method. (Default value = "Sinkhorn ")
645
+ The optimal transportation distance computation method. (Default value = "OTDSinkhornMix ")
647
646
648
647
Transportation method:
649
648
- "OTD" for Optimal Transportation Distance,
650
649
- "ATD" for Average Transportation Distance.
651
- - "Sinkhorn" for OTD approximated Sinkhorn distance. (faster)
650
+ - "Sinkhorn" for OTD approximated Sinkhorn distance.
651
+ - "OTDSinkhornMix" use OTD for nodes of edge with less than _OTDSinkhorn_threshold(default 2000) neighbors,
652
+ use Sinkhorn for faster computation with nodes of edge more neighbors. (OTD is faster for smaller cases)
652
653
base : float
653
654
Base variable for weight distribution. (Default value = `math.e`)
654
655
exp_power : float
@@ -664,7 +665,7 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="Sinkhorn",
664
665
Set this to `None` for unlimited cache. (Default value = 1000000)
665
666
nbr_topk : int
666
667
Only take the top k edge weight neighbors for density distribution.
667
- Smaller k run faster but the result is less accurate. (Default value = 1000 )
668
+ Smaller k run faster but the result is less accurate. (Default value = 3000 )
668
669
verbose : {"INFO", "TRACE","DEBUG","ERROR"}
669
670
Verbose level. (Default value = "ERROR")
670
671
- "INFO": show only iteration process log.
@@ -765,13 +766,13 @@ def compute_ricci_curvature(self):
765
766
nbr_topk = self .nbr_topk )
766
767
return self .G
767
768
768
- def compute_ricci_flow (self , iterations = 20 , step = 1 , delta = 1e-4 , surgery = (lambda G , * args , ** kwargs : G , 100 )):
769
+ def compute_ricci_flow (self , iterations = 10 , step = 1 , delta = 1e-4 , surgery = (lambda G , * args , ** kwargs : G , 100 )):
769
770
"""Compute the given Ricci flow metric of each edge of a given connected NetworkX graph.
770
771
771
772
Parameters
772
773
----------
773
774
iterations : int
774
- Iterations to require Ricci flow metric. (Default value = 100 )
775
+ Iterations to require Ricci flow metric. (Default value = 10 )
775
776
step : float
776
777
Step size for gradient decent process. (Default value = 1)
777
778
delta : float
0 commit comments