44
44
_exp_power = 2
45
45
_proc = cpu_count ()
46
46
_cache_maxsize = 1000000
47
- _nbr_topk = 50
47
+ _shortest_path = "all_pairs"
48
+ _nbr_topk = 1000
49
+ _apsp = {}
48
50
49
51
# -------------------------------------------------------
50
52
53
+ @lru_cache (_cache_maxsize )
54
+ def _get_single_node_neighbors_distributions (node , direction = "successors" ):
55
+ """Get the neighbor density distribution of given node `node`.
56
+
57
+ Parameters
58
+ ----------
59
+ node : int
60
+ Node index in Networkit graph `_Gk`.
61
+ direction : {"predecessors", "successors"}
62
+ Direction of neighbors in directed graph. (Default value: "successors")
63
+
64
+ Returns
65
+ -------
66
+ distributions : lists of float
67
+ Density distributions of neighbors up to top `_nbr_topk` nodes.
68
+ nbrs : lists of int
69
+ Neighbor index up to top `_nbr_topk` nodes.
70
+
71
+ """
72
+ if _Gk .isDirected ():
73
+ if direction == "predecessors" :
74
+ neighbors = _Gk .inNeighbors (node )
75
+ else : # successors
76
+ neighbors = _Gk .neighbors (node )
77
+ else :
78
+ neighbors = _Gk .neighbors (node )
79
+
80
+ # Get sum of distributions from x's all neighbors
81
+ heap_weight_node_pair = []
82
+ if direction == "predecessors" :
83
+ for nbr in neighbors :
84
+ if len (heap_weight_node_pair ) < _nbr_topk :
85
+ heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
86
+ else :
87
+ heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
88
+ else : # successors
89
+ for nbr in neighbors :
90
+ if len (heap_weight_node_pair ) < _nbr_topk :
91
+ heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
92
+ else :
93
+ heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
94
+
95
+ nbr_edge_weight_sum = sum ([x [0 ] for x in heap_weight_node_pair ])
96
+
97
+ if nbr_edge_weight_sum > EPSILON :
98
+ # Sum need to be not too small to prevent divided by zero
99
+ distributions = [(1.0 - _alpha ) * w / nbr_edge_weight_sum for w , _ in heap_weight_node_pair ]
100
+ nbr = [x [1 ] for x in heap_weight_node_pair ]
101
+ return distributions + [_alpha ], nbr + [node ]
102
+ elif len (neighbors ) == 0 :
103
+ # No neighbor, all mass stay at node
104
+ return [1 ], [node ]
105
+ else :
106
+ logger .warning ("Neighbor weight sum too small, list:" , heap_weight_node_pair )
107
+ distributions = [(1.0 - _alpha ) / len (heap_weight_node_pair )] * len (heap_weight_node_pair )
108
+ nbr = [x [1 ] for x in heap_weight_node_pair ]
109
+ return distributions + [_alpha ], nbr + [node ]
51
110
52
- def _distribute_densities (source , target , nbr_topk = _nbr_topk ):
111
+
112
+ def _distribute_densities (source , target ):
53
113
"""Get the density distributions of source and target node, and the cost (all pair shortest paths) between
54
- all source's and target's neighbors.
114
+ all source's and target's neighbors. Notice that only neighbors with top `_nbr_topk` edge weights.
55
115
56
116
Parameters
57
117
----------
58
118
source : int
59
119
Source node index in Networkit graph `_Gk`.
60
120
target : int
61
121
Target node index in Networkit graph `_Gk`.
62
- nbr_topk : int
63
- Only take the neighbors with top k edge weights.
64
122
Returns
65
123
-------
66
124
x : (m,) numpy.ndarray
@@ -72,50 +130,14 @@ def _distribute_densities(source, target, nbr_topk=_nbr_topk):
72
130
73
131
"""
74
132
75
- # Append source and target node into weight distribution matrix x,y
133
+ # Distribute densities for source and source's neighbors as x
76
134
if _Gk .isDirected ():
77
- source_nbr = _Gk . inNeighbors (source )
135
+ x , source_topknbr = _get_single_node_neighbors_distributions (source , "predecessors" )
78
136
else :
79
- source_nbr = _Gk .neighbors (source )
80
- target_nbr = _Gk .neighbors (target )
81
-
82
- def _get_single_node_neighbors_distributions (node , neighbors , direction = "successors" ):
83
- # Get sum of distributions from x's all neighbors
84
- heap_weight_node_pair = []
85
- if direction == "predecessors" :
86
- for nbr in neighbors :
87
- if len (heap_weight_node_pair ) < nbr_topk :
88
- heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
89
- else :
90
- heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (nbr , node ) ** _exp_power ), nbr ))
91
- else : # successors
92
- for nbr in neighbors :
93
- if len (heap_weight_node_pair ) < nbr_topk :
94
- heapq .heappush (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
95
- else :
96
- heapq .heappushpop (heap_weight_node_pair , (_base ** (- _get_pairwise_sp (node , nbr ) ** _exp_power ), nbr ))
97
-
98
- nbr_edge_weight_sum = sum ([x [0 ] for x in heap_weight_node_pair ])
99
-
100
- if nbr_edge_weight_sum > EPSILON :
101
- # Sum need to be not too small to prevent divided by zero
102
- distributions = [(1.0 - _alpha ) * w / nbr_edge_weight_sum for w , _ in heap_weight_node_pair ]
103
- nbr = [x [1 ] for x in heap_weight_node_pair ]
104
- return distributions + [_alpha ], nbr + [node ]
105
- elif len (neighbors ) == 0 :
106
- # No neighbor, all mass stay at node
107
- return [1 ], [node ]
108
- else :
109
- logger .warning ("Neighbor weight sum too small, list:" , heap_weight_node_pair )
110
- distributions = [(1.0 - _alpha ) / len (heap_weight_node_pair )] * len (heap_weight_node_pair )
111
- nbr = [x [1 ] for x in heap_weight_node_pair ]
112
- return distributions + [_alpha ], nbr + [node ]
113
-
114
- # Distribute densities for source and source's neighbors as x
115
- x , source_topknbr = _get_single_node_neighbors_distributions (source , source_nbr , "predecessors" )
137
+ x , source_topknbr = _get_single_node_neighbors_distributions (source , "successors" )
116
138
117
139
# Distribute densities for target and target's neighbors as y
118
- y , target_topknbr = _get_single_node_neighbors_distributions (target , target_nbr , "successors" )
140
+ y , target_topknbr = _get_single_node_neighbors_distributions (target , "successors" )
119
141
120
142
# construct the cost dictionary from x to y
121
143
d = np .zeros ((len (x ), len (y )))
@@ -131,7 +153,7 @@ def _get_single_node_neighbors_distributions(node, neighbors, direction="success
131
153
132
154
133
155
@lru_cache (_cache_maxsize )
134
- def _get_pairwise_sp (source , target ):
156
+ def _source_target_shortest_path (source , target ):
135
157
"""Compute pairwise shortest path from `source` to `target` by BidirectionalDijkstra via Networkit.
136
158
137
159
Parameters
@@ -153,6 +175,42 @@ def _get_pairwise_sp(source, target):
153
175
return length
154
176
155
177
178
+ def _get_pairwise_sp (source , target ):
179
+ """Compute pairwise shortest path from `source` to `target`.
180
+
181
+ Parameters
182
+ ----------
183
+ source : int
184
+ Source node index in Networkit graph `_Gk`.
185
+ target : int
186
+ Target node index in Networkit graph `_Gk`.
187
+
188
+ Returns
189
+ -------
190
+ length : float
191
+ Pairwise shortest path length.
192
+
193
+ """
194
+
195
+ if _shortest_path == "pairwise" :
196
+ return _source_target_shortest_path (source , target )
197
+
198
+ return _apsp [source ][target ]
199
+
200
+
201
+ def _get_all_pairs_shortest_path ():
202
+ """Pre-compute all pairs shortest paths of the assigned graph `_Gk`."""
203
+ logger .info ("Start to compute all pair shortest path." )
204
+
205
+ global _Gk
206
+
207
+ t0 = time .time ()
208
+ apsp = nk .distance .APSP (_Gk ).run ().getDistances ()
209
+ logger .info ("%8f secs for all pair by NetworKit." % (time .time () - t0 ))
210
+
211
+ return apsp
212
+
213
+
156
214
def _optimal_transportation_distance (x , y , d ):
157
215
"""Compute the optimal transportation distance (OTD) of the given density distributions by CVXPY.
158
216
@@ -287,12 +345,12 @@ def _compute_ricci_curvature_single_edge(source, target):
287
345
assert _method in ["OTD" , "ATD" , "Sinkhorn" ], \
288
346
'Method %s not found, support method:["OTD", "ATD", "Sinkhorn"]' % _method
289
347
if _method == "OTD" :
290
- x , y , d = _distribute_densities (source , target , _nbr_topk )
348
+ x , y , d = _distribute_densities (source , target )
291
349
m = _optimal_transportation_distance (x , y , d )
292
350
elif _method == "ATD" :
293
351
m = _average_transportation_distance (source , target )
294
352
elif _method == "Sinkhorn" :
295
- x , y , d = _distribute_densities (source , target , _nbr_topk )
353
+ x , y , d = _distribute_densities (source , target )
296
354
m = _sinkhorn_distance (x , y , d )
297
355
298
356
# compute Ricci curvature: k=1-(m_{x,y})/d(x,y)
@@ -310,7 +368,7 @@ def _wrap_compute_single_edge(stuff):
310
368
def _compute_ricci_curvature_edges (G : nx .Graph , weight = "weight" , edge_list = [],
311
369
alpha = 0.5 , method = "OTD" ,
312
370
base = math .e , exp_power = 2 , proc = cpu_count (), chunksize = None , cache_maxsize = 1000000 ,
313
- nbr_topk = 1000 ):
371
+ shortest_path = "all_pairs" , nbr_topk = 1000 ):
314
372
"""Compute Ricci curvature for edges in given edge lists.
315
373
316
374
Parameters
@@ -344,6 +402,8 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
344
402
cache_maxsize : int
345
403
Max size for LRU cache for pairwise shortest path computation.
346
404
Set this to `None` for unlimited cache. (Default value = 1000000)
405
+ shortest_path : {"all_pairs","pairwise"}
406
+ Method to compute shortest path. (Default value = `all_pairs`)
347
407
nbr_topk : int
348
408
Only take the top k edge weight neighbors for density distribution.
349
409
Smaller k run faster but the result is less accurate. (Default value = 1000)
@@ -355,6 +415,9 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
355
415
356
416
"""
357
417
418
+ logger .info ("Number of nodes: %d" % G .number_of_nodes ())
419
+ logger .info ("Number of edges: %d" % G .number_of_edges ())
420
+
358
421
if not nx .get_edge_attributes (G , weight ):
359
422
print ('Edge weight not detected in graph, use "weight" as default edge weight.' )
360
423
for (v1 , v2 ) in G .edges ():
@@ -369,7 +432,9 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
369
432
global _exp_power
370
433
global _proc
371
434
global _cache_maxsize
435
+ global _shortest_path
372
436
global _nbr_topk
437
+ global _apsp
373
438
# -------------------------------------------------------
374
439
375
440
_Gk = nk .nxadapter .nx2nk (G , weightAttr = weight )
@@ -380,6 +445,7 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
380
445
_exp_power = exp_power
381
446
_proc = proc
382
447
_cache_maxsize = cache_maxsize
448
+ _shortest_path = shortest_path
383
449
_nbr_topk = nbr_topk
384
450
385
451
# Construct nx to nk dictionary
@@ -388,6 +454,11 @@ def _compute_ricci_curvature_edges(G: nx.Graph, weight="weight", edge_list=[],
388
454
nx2nk_ndict [n ] = idx
389
455
nk2nx_ndict [idx ] = n
390
456
457
+ if _shortest_path == "all_pairs" :
458
+ # Construct the all pair shortest path dictionary
459
+ if not _apsp :
460
+ _apsp = _get_all_pairs_shortest_path ()
461
+
391
462
if edge_list :
392
463
args = [(nx2nk_ndict [source ], nx2nk_ndict [target ]) for source , target in edge_list ]
393
464
else :
@@ -500,12 +571,11 @@ def _compute_ricci_flow(G: nx.Graph, weight="weight",
500
571
G = nx .Graph (G .subgraph (max (nx .connected_components (G ), key = len )))
501
572
G .remove_edges_from (nx .selfloop_edges (G ))
502
573
503
- logger .info ("Number of nodes: %d" % G .number_of_nodes ())
504
- logger .info ("Number of edges: %d" % G .number_of_edges ())
505
-
506
574
# Set normalized weight to be the number of edges.
507
575
normalized_weight = float (G .number_of_edges ())
508
576
577
+ global _apsp
578
+
509
579
# Start compute edge Ricci flow
510
580
t0 = time .time ()
511
581
@@ -517,6 +587,10 @@ def _compute_ricci_flow(G: nx.Graph, weight="weight",
517
587
for (v1 , v2 ) in G .edges ():
518
588
G [v1 ][v2 ]["original_RC" ] = G [v1 ][v2 ]["ricciCurvature" ]
519
589
590
+ # clear the APSP since the graph have changed.
591
+ _apsp = {}
592
+
593
+
520
594
# Start the Ricci flow process
521
595
for i in range (iterations ):
522
596
for (v1 , v2 ) in G .edges ():
@@ -550,7 +624,10 @@ def _compute_ricci_flow(G: nx.Graph, weight="weight",
550
624
normalized_weight = float (G .number_of_edges ())
551
625
552
626
for n1 , n2 in G .edges ():
553
- logger .debug (n1 , n2 , G [n1 ][n2 ])
627
+ logger .debug ("%s %s %s" % (n1 , n2 , G [n1 ][n2 ]))
628
+
629
+ # clear the APSP since the graph have changed.
630
+ _apsp = {}
554
631
555
632
logger .info ("\n %8f secs for Ricci flow computation." % (time .time () - t0 ))
556
633
@@ -564,7 +641,8 @@ class OllivierRicci:
564
641
"""
565
642
566
643
def __init__ (self , G : nx .Graph , weight = "weight" , alpha = 0.5 , method = "OTD" ,
567
- base = math .e , exp_power = 2 , proc = cpu_count (), chunksize = None , cache_maxsize = 1000000 ,
644
+ base = math .e , exp_power = 2 , proc = cpu_count (), chunksize = None , shortest_path = "all_pairs" ,
645
+ cache_maxsize = 1000000 ,
568
646
nbr_topk = 1000 , verbose = "ERROR" ):
569
647
"""Initialized a container to compute Ollivier-Ricci curvature/flow.
570
648
@@ -596,13 +674,15 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="OTD",
596
674
Number of processor used for multiprocessing. (Default value = `cpu_count()`)
597
675
chunksize : int
598
676
Chunk size for multiprocessing, set None for auto decide. (Default value = `None`)
677
+ shortest_path : {"all_pairs","pairwise"}
678
+ Method to compute shortest path. (Default value = `all_pairs`)
599
679
cache_maxsize : int
600
680
Max size for LRU cache for pairwise shortest path computation.
601
681
Set this to `None` for unlimited cache. (Default value = 1000000)
602
682
nbr_topk : int
603
683
Only take the top k edge weight neighbors for density distribution.
604
684
Smaller k run faster but the result is less accurate. (Default value = 1000)
605
- verbose: {"INFO","DEBUG","ERROR"}
685
+ verbose : {"INFO","DEBUG","ERROR"}
606
686
Verbose level. (Default value = "ERROR")
607
687
- "INFO": show only iteration process log.
608
688
- "DEBUG": show all output logs.
@@ -618,6 +698,7 @@ def __init__(self, G: nx.Graph, weight="weight", alpha=0.5, method="OTD",
618
698
self .proc = proc
619
699
self .chunksize = chunksize
620
700
self .cache_maxsize = cache_maxsize
701
+ self .shortest_path = shortest_path
621
702
self .nbr_topk = nbr_topk
622
703
623
704
self .set_verbose (verbose )
@@ -658,7 +739,8 @@ def compute_ricci_curvature_edges(self, edge_list=None):
658
739
alpha = self .alpha , method = self .method ,
659
740
base = self .base , exp_power = self .exp_power ,
660
741
proc = self .proc , chunksize = self .chunksize ,
661
- cache_maxsize = self .cache_maxsize , nbr_topk = self .nbr_topk )
742
+ cache_maxsize = self .cache_maxsize , shortest_path = self .shortest_path ,
743
+ nbr_topk = self .nbr_topk )
662
744
663
745
def compute_ricci_curvature (self ):
664
746
"""Compute Ricci curvature of edges and nodes.
@@ -684,6 +766,7 @@ def compute_ricci_curvature(self):
684
766
alpha = self .alpha , method = self .method ,
685
767
base = self .base , exp_power = self .exp_power ,
686
768
proc = self .proc , chunksize = self .chunksize , cache_maxsize = self .cache_maxsize ,
769
+ shortest_path = self .shortest_path ,
687
770
nbr_topk = self .nbr_topk )
688
771
return self .G
689
772
@@ -724,5 +807,5 @@ def compute_ricci_flow(self, iterations=10, step=1, delta=1e-4, surgery=(lambda
724
807
alpha = self .alpha , method = self .method ,
725
808
base = self .base , exp_power = self .exp_power ,
726
809
proc = self .proc , chunksize = self .chunksize , cache_maxsize = self .cache_maxsize ,
727
- nbr_topk = self .nbr_topk )
810
+ shortest_path = self . shortest_path , nbr_topk = self .nbr_topk )
728
811
return self .G
0 commit comments