24
24
LOGGER = getLogger (__name__ )
25
25
"""The logger for the package."""
26
26
27
- bounds_tol = np . finfo ( np . float32 ). eps
27
+ bounds_tol = 1e-6
28
28
"""The tolerance used for checking bounds feasibility."""
29
29
30
- feasibility_tol = bounds_tol
30
+ feasibility_tol = 1e-6
31
31
"""The tolerance used for checking equalities feasibility."""
32
32
33
- nproj = 1000000
34
- """Reproject the solution into the feasibility space every nproj iterations."""
35
-
36
- nproj_center = 10000
37
- """Reproject the center into the nullspace every nproj_center iterations.
38
- Only used for inhomogeneous problems."""
33
+ max_tries = 100
34
+ """Maximum number of retries for sampling."""
39
35
40
36
Problem = namedtuple ("Problem" ,
41
37
["equalities" , "b" , "inequalities" , "bounds" ,
@@ -106,7 +102,7 @@ def shared_np_array(shape, data=None, integer=False):
106
102
107
103
108
104
# Has to be declared outside of class to be used for multiprocessing :(
109
- def _step (sampler , x , delta , fraction = None ):
105
+ def _step (sampler , x , delta , fraction = None , tries = 0 ):
110
106
"""Sample a new feasible point from the point `x` in direction `delta`."""
111
107
prob = sampler .problem
112
108
valid = ((np .abs (delta ) > feasibility_tol ) &
@@ -116,14 +112,19 @@ def _step(sampler, x, delta, fraction=None):
116
112
valphas = (valphas / delta [valid ]).flatten ()
117
113
if prob .bounds .shape [0 ] > 0 :
118
114
# permissible alphas for staying in constraint bounds
115
+ ineqs = prob .inequalities .dot (delta )
116
+ valid = np .abs (ineqs ) > feasibility_tol
119
117
balphas = ((1.0 - bounds_tol ) * prob .bounds -
120
- prob .inequalities .dot (x ))
121
- balphas = (balphas / prob . inequalities . dot ( delta ) ).flatten ()
118
+ prob .inequalities .dot (x ))[:, valid ]
119
+ balphas = (balphas / ineqs [ valid ] ).flatten ()
122
120
# combined alphas
123
121
alphas = np .hstack ([valphas , balphas ])
124
122
else :
125
123
alphas = valphas
126
- alpha_range = (alphas [alphas > 0.0 ].min (), alphas [alphas <= 0.0 ].max ())
124
+ pos_alphas = alphas [alphas > 0.0 ]
125
+ neg_alphas = alphas [alphas <= 0.0 ]
126
+ alpha_range = np .array ([neg_alphas .max () if len (neg_alphas ) > 0 else 0 ,
127
+ pos_alphas .min () if len (pos_alphas ) > 0 else 0 ])
127
128
128
129
if fraction :
129
130
alpha = alpha_range [0 ] + fraction * (alpha_range [1 ] - alpha_range [0 ])
@@ -133,12 +134,21 @@ def _step(sampler, x, delta, fraction=None):
133
134
134
135
# Numerical instabilities may cause bounds invalidation
135
136
# reset sampler and sample from one of the original warmup directions
136
- # if that occurs
137
- if np .any (sampler ._bounds_dist (p ) < - bounds_tol ):
137
+ # if that occurs. Also reset if we got stuck.
138
+ if (np .any (sampler ._bounds_dist (p ) < - bounds_tol ) or
139
+ np .abs (np .abs (alpha_range ).max () * delta ).max () < bounds_tol ):
140
+ if tries > max_tries :
141
+ raise RuntimeError ("Can not escape sampling region, model seems"
142
+ " numerically unstable :( Reporting the "
143
+ "model to "
144
+ "https://github.com/opencobra/cobrapy/issues "
145
+ "will help us to fix this :)" )
138
146
LOGGER .info ("found bounds infeasibility in sample, "
139
147
"resetting to center" )
140
148
newdir = sampler .warmup [np .random .randint (sampler .n_warmup )]
141
- return _step (sampler , sampler .center , newdir - sampler .center )
149
+ sampler .retries += 1
150
+ return _step (sampler , sampler .center , newdir - sampler .center , None ,
151
+ tries + 1 )
142
152
return p
143
153
144
154
@@ -152,6 +162,13 @@ class HRSampler(object):
152
162
thinning : int
153
163
The thinning factor of the generated sampling chain. A thinning of 10
154
164
means samples are returned every 10 steps.
165
+ nproj : int > 0, optional
166
+ How often to reproject the sampling point into the feasibility space.
167
+ Avoids numerical issues at the cost of lower sampling. If you observe
168
+ many equality constraint violations with `sampler.validate` you should
169
+ lower this number.
170
+ seed : int > 0, optional
171
+ The random number seed that should be used.
155
172
156
173
Attributes
157
174
----------
@@ -162,13 +179,18 @@ class HRSampler(object):
162
179
n_samples : int
163
180
The total number of samples that have been generated by this
164
181
sampler instance.
182
+ retries : int
183
+ The overall of sampling retries the sampler has observed. Larger
184
+ values indicate numerical instabilities.
165
185
problem : collections.namedtuple
166
186
A python object whose attributes define the entire sampling problem in
167
187
matrix form. See docstring of `Problem`.
168
188
warmup : a numpy matrix
169
189
A matrix of with as many columns as reactions in the model and more
170
190
than 3 rows containing a warmup sample in each row. None if no warmup
171
191
points have been generated yet.
192
+ nproj : int
193
+ How often to reproject the sampling point into the feasibility space.
172
194
seed : positive integer, optional
173
195
Sets the random number seed. Initialized to the current time stamp if
174
196
None.
@@ -180,7 +202,7 @@ class HRSampler(object):
180
202
the respective reverse variable.
181
203
"""
182
204
183
- def __init__ (self , model , thinning , seed = None ):
205
+ def __init__ (self , model , thinning , nproj = None , seed = None ):
184
206
"""Initialize a new sampler object."""
185
207
# This currently has to be done to reset the solver basis which is
186
208
# required to get deterministic warmup point generation
@@ -189,7 +211,12 @@ def __init__(self, model, thinning, seed=None):
189
211
raise TypeError ("sampling does not work with integer problems :(" )
190
212
self .model = model .copy ()
191
213
self .thinning = thinning
214
+ if nproj is None :
215
+ self .nproj = int (min (len (self .model .variables )** 3 , 1e6 ))
216
+ else :
217
+ self .nproj = nproj
192
218
self .n_samples = 0
219
+ self .retries = 0
193
220
self .problem = self .__build_problem ()
194
221
# Set up a map from reaction -> forward/reverse variable
195
222
var_idx = {v : idx for idx , v in enumerate (model .variables )}
@@ -252,32 +279,54 @@ def generate_fva_warmup(self):
252
279
if necessary).
253
280
"""
254
281
self .n_warmup = 0
255
- idx = np . hstack ([ self .fwd_idx , self . rev_idx ])
256
- self .warmup = np .zeros ((len (idx ), len (self .model .variables )))
282
+ reactions = self .model . reactions
283
+ self .warmup = np .zeros ((2 * len (reactions ), len (self .model .variables )))
257
284
self .model .objective = Zero
258
- self .model .objective .direction = "max"
259
- variables = self .model .variables
260
- for i in idx :
261
- # Omit fixed reactions
262
- if self .problem .variable_fixed [i ]:
263
- LOGGER .info ("skipping fixed variable %s" %
264
- variables [i ].name )
265
- continue
266
- self .model .objective .set_linear_coefficients ({variables [i ]: 1 })
267
- self .model .slim_optimize ()
268
- if not self .model .solver .status == OPTIMAL :
269
- LOGGER .info ("can not maximize variable %s, skipping it" %
270
- variables [i ].name )
271
- continue
272
- primals = self .model .solver .primal_values
273
- sol = [primals [v .name ] for v in self .model .variables ]
274
- self .warmup [self .n_warmup , ] = sol
285
+ for sense in ("min" , "max" ):
286
+ self .model .objective_direction = sense
287
+ for i , r in enumerate (reactions ):
288
+ variables = (self .model .variables [self .fwd_idx [i ]],
289
+ self .model .variables [self .rev_idx [i ]])
290
+ # Omit fixed reactions if they are non-homogeneous
291
+ if r .upper_bound - r .lower_bound < bounds_tol :
292
+ LOGGER .info ("skipping fixed reaction %s" % r .id )
293
+ continue
294
+ self .model .objective .set_linear_coefficients (
295
+ {variables [0 ]: 1 , variables [1 ]: - 1 })
296
+ self .model .slim_optimize ()
297
+ if not self .model .solver .status == OPTIMAL :
298
+ LOGGER .info ("can not maximize reaction %s, skipping it" %
299
+ r .id )
300
+ continue
301
+ primals = self .model .solver .primal_values
302
+ sol = [primals [v .name ] for v in self .model .variables ]
303
+ self .warmup [self .n_warmup , ] = sol
304
+ self .n_warmup += 1
305
+ # Reset objective
306
+ self .model .objective .set_linear_coefficients (
307
+ {variables [0 ]: 0 , variables [1 ]: 0 })
308
+ # Shrink to measure
309
+ self .warmup = self .warmup [0 :self .n_warmup , :]
310
+ # Remove redundant search directions
311
+ keep = np .logical_not (self ._is_redundant (self .warmup ))
312
+ self .warmup = self .warmup [keep , :]
313
+ self .n_warmup = self .warmup .shape [0 ]
314
+
315
+ # Catch some special cases
316
+ if len (self .warmup .shape ) == 1 or self .warmup .shape [0 ] == 1 :
317
+ raise ValueError ("Your flux cone consists only of a single point!" )
318
+ elif self .n_warmup == 2 :
319
+ if not self .problem .homogeneous :
320
+ raise ValueError ("Can not sample from an inhomogenous problem"
321
+ " with only 2 search directions :(" )
322
+ LOGGER .info ("All search directions on a line, adding another one." )
323
+ newdir = self .warmup .T .dot ([0.25 , 0.25 ])
324
+ self .warmup = np .vstack ([self .warmup , newdir ])
275
325
self .n_warmup += 1
276
- # revert objective
277
- self .model .objective .set_linear_coefficients ({variables [i ]: 0 })
326
+
278
327
# Shrink warmup points to measure
279
- self .warmup = shared_np_array (( self . n_warmup , len ( variables )),
280
- self .warmup [ 0 : self .n_warmup , ] )
328
+ self .warmup = shared_np_array (
329
+ ( self .n_warmup , len ( self .model . variables )), self . warmup )
281
330
282
331
def _reproject (self , p ):
283
332
"""Reproject a point into the feasibility region.
@@ -307,14 +356,28 @@ def _reproject(self, p):
307
356
new = nulls .dot (nulls .T .dot (p ))
308
357
# Projections may violate bounds
309
358
# set to random point in space in that case
310
- if any (self . _bounds_dist ( new ) < - bounds_tol ):
359
+ if any (new != p ):
311
360
LOGGER .info ("reprojection failed in sample"
312
361
" %d, using random point in space" % self .n_samples )
313
- idx = np .random .randint (self .n_warmup ,
314
- size = min (2 , int (np .sqrt (self .n_warmup ))))
315
- new = self .warmup [idx , :].mean (axis = 0 )
362
+ new = self ._random_point ()
316
363
return new
317
364
365
+ def _random_point (self ):
366
+ """Find an approximately random point in the flux cone."""
367
+ idx = np .random .randint (self .n_warmup ,
368
+ size = min (2 , np .ceil (np .sqrt (self .n_warmup ))))
369
+ return self .warmup [idx , :].mean (axis = 0 )
370
+
371
+ def _is_redundant (self , matrix , cutoff = 1.0 - feasibility_tol ):
372
+ """Identify rdeundant rows in a matrix that can be removed."""
373
+ # Avoid zero variances
374
+ extra_col = matrix [:, 0 ] + 1
375
+ # Avoid zero rows being correlated with constant rows
376
+ extra_col [matrix .sum (axis = 1 ) == 0 ] = 2
377
+ corr = np .corrcoef (np .c_ [matrix , extra_col ])
378
+ corr = np .tril (corr , - 1 )
379
+ return (np .abs (corr ) > cutoff ).any (axis = 1 )
380
+
318
381
def _bounds_dist (self , p ):
319
382
"""Get the lower and upper bound distances. Negative is bad."""
320
383
prob = self .problem
@@ -445,7 +508,12 @@ class ACHRSampler(HRSampler):
445
508
thinning : int, optional
446
509
The thinning factor of the generated sampling chain. A thinning of 10
447
510
means samples are returned every 10 steps.
448
- seed : positive integer, optional
511
+ nproj : int > 0, optional
512
+ How often to reproject the sampling point into the feasibility space.
513
+ Avoids numerical issues at the cost of lower sampling. If you observe
514
+ many equality constraint violations with `sampler.validate` you should
515
+ lower this number.
516
+ seed : int > 0, optional
449
517
Sets the random number seed. Initialized to the current time stamp if
450
518
None.
451
519
@@ -465,9 +533,14 @@ class ACHRSampler(HRSampler):
465
533
A matrix of with as many columns as reactions in the model and more
466
534
than 3 rows containing a warmup sample in each row. None if no warmup
467
535
points have been generated yet.
536
+ retries : int
537
+ The overall of sampling retries the sampler has observed. Larger
538
+ values indicate numerical instabilities.
468
539
seed : positive integer, optional
469
540
Sets the random number seed. Initialized to the current time stamp if
470
541
None.
542
+ nproj : int
543
+ How often to reproject the sampling point into the feasibility space.
471
544
fwd_idx : np.array
472
545
Has one entry for each reaction in the model containing the index of
473
546
the respective forward variable.
@@ -503,9 +576,10 @@ class ACHRSampler(HRSampler):
503
576
.. [2] https://github.com/opencobra/cobratoolbox
504
577
"""
505
578
506
- def __init__ (self , model , thinning = 100 , seed = None ):
579
+ def __init__ (self , model , thinning = 100 , nproj = None , seed = None ):
507
580
"""Initialize a new ACHRSampler."""
508
- super (ACHRSampler , self ).__init__ (model , thinning , seed = seed )
581
+ super (ACHRSampler , self ).__init__ (model , thinning , nproj = nproj ,
582
+ seed = seed )
509
583
self .generate_fva_warmup ()
510
584
self .prev = self .center = self .warmup .mean (axis = 0 )
511
585
np .random .seed (self ._seed )
@@ -516,10 +590,11 @@ def __single_iteration(self):
516
590
delta = self .warmup [pi , ] - self .center
517
591
self .prev = _step (self , self .prev , delta )
518
592
if self .problem .homogeneous and (self .n_samples *
519
- self .thinning % nproj == 0 ):
593
+ self .thinning % self . nproj == 0 ):
520
594
self .prev = self ._reproject (self .prev )
521
- self .center = (self .n_samples * self .center + self .prev ) / (
522
- self .n_samples + 1 )
595
+ self .center = self ._reproject (self .center )
596
+ self .center = ((self .n_samples * self .center ) / (self .n_samples + 1 ) +
597
+ self .prev / (self .n_samples + 1 ))
523
598
self .n_samples += 1
524
599
525
600
def sample (self , n , fluxes = True ):
@@ -584,15 +659,17 @@ def _sample_chain(args):
584
659
delta = sampler .warmup [pi , ] - center
585
660
586
661
prev = _step (sampler , prev , delta )
587
- if sampler .problem .homogeneous and (n_samples *
588
- sampler .thinning % nproj == 0 ):
662
+ if sampler .problem .homogeneous and (
663
+ n_samples * sampler .thinning % sampler . nproj == 0 ):
589
664
prev = sampler ._reproject (prev )
665
+ center = sampler ._reproject (center )
590
666
if i % sampler .thinning == 0 :
591
667
samples [i // sampler .thinning - 1 , ] = prev
592
- center = (n_samples * center + prev ) / (n_samples + 1 )
593
- n_samples += 1
668
+ center = ((n_samples * center ) / (n_samples + 1 ) +
669
+ prev / (n_samples + 1 ))
670
+ n_samples += 1
594
671
595
- return samples
672
+ return ( sampler . retries , samples )
596
673
597
674
598
675
class OptGPSampler (HRSampler ):
@@ -610,7 +687,12 @@ class OptGPSampler(HRSampler):
610
687
thinning : int, optional
611
688
The thinning factor of the generated sampling chain. A thinning of 10
612
689
means samples are returned every 10 steps.
613
- seed : positive integer, optional
690
+ nproj : int > 0, optional
691
+ How often to reproject the sampling point into the feasibility space.
692
+ Avoids numerical issues at the cost of lower sampling. If you observe
693
+ many equality constraint violations with `sampler.validate` you should
694
+ lower this number.
695
+ seed : int > 0, optional
614
696
Sets the random number seed. Initialized to the current time stamp if
615
697
None.
616
698
@@ -630,9 +712,14 @@ class OptGPSampler(HRSampler):
630
712
A matrix of with as many columns as reactions in the model and more
631
713
than 3 rows containing a warmup sample in each row. None if no warmup
632
714
points have been generated yet.
715
+ retries : int
716
+ The overall of sampling retries the sampler has observed. Larger
717
+ values indicate numerical instabilities.
633
718
seed : positive integer, optional
634
719
Sets the random number seed. Initialized to the current time stamp if
635
720
None.
721
+ nproj : int
722
+ How often to reproject the sampling point into the feasibility space.
636
723
fwd_idx : np.array
637
724
Has one entry for each reaction in the model containing the index of
638
725
the respective forward variable.
@@ -672,7 +759,8 @@ class OptGPSampler(HRSampler):
672
759
https://doi.org/10.1371/journal.pone.0086587
673
760
"""
674
761
675
- def __init__ (self , model , processes , thinning = 100 , seed = None ):
762
+ def __init__ (self , model , processes , thinning = 100 , nproj = None ,
763
+ seed = None ):
676
764
"""Initialize a new OptGPSampler."""
677
765
super (OptGPSampler , self ).__init__ (model , thinning , seed = seed )
678
766
self .generate_fva_warmup ()
@@ -725,18 +813,19 @@ def sample(self, n, fluxes=True):
725
813
# No with statement or starmap here since Python 2.x
726
814
# does not support it :(
727
815
mp = Pool (self .processes , initializer = mp_init , initargs = (self ,))
728
- chains = mp .map (_sample_chain , args , chunksize = 1 )
816
+ results = mp .map (_sample_chain , args , chunksize = 1 )
729
817
mp .close ()
730
818
mp .join ()
731
- chains = np .vstack (chains )
819
+ chains = np .vstack ([r [1 ] for r in results ])
820
+ self .retries += sum (r [0 ] for r in results )
732
821
else :
733
822
mp_init (self )
734
- chains = _sample_chain ((n , 0 ))
823
+ results = _sample_chain ((n , 0 ))
824
+ chains = results [1 ]
735
825
736
826
# Update the global center
737
827
self .center = (self .n_samples * self .center +
738
- n * np .atleast_2d (chains ).mean (axis = 0 )) / (
739
- self .n_samples + n )
828
+ np .atleast_2d (chains ).sum (0 )) / (self .n_samples + n )
740
829
self .n_samples += n
741
830
742
831
if fluxes :
0 commit comments