@@ -306,6 +306,77 @@ def loadDatabase(self):
306
306
with open (cache_hash_file ,'w' ) as f :
307
307
f .write (database_hash )
308
308
309
+ def loadThermoDatabase (self ):
310
+ """
311
+ Load the RMG Database.
312
+
313
+ The data is loaded from self.databaseDirectory, according to settings in:
314
+
315
+ * self.thermoLibraries
316
+ * self.reactionLibraries
317
+ * self.seedMechanisms
318
+ * self.kineticsFamilies
319
+ * self.kineticsDepositories
320
+
321
+ If `self.kineticsEstimator == 'rate rules'` then the training set values are
322
+ added and the blanks are filled in by averaging.
323
+
324
+ If self.outputDirectory contains :file:`database.pkl` and :file:`database.hash` files then
325
+ these are checked for validity and used as a cache. Once loaded (and averages filled
326
+ in if necessary) then a cache (pickle and hash) is saved.
327
+ """
328
+ import inspect , hashlib , cPickle , rmgpy .utilities , scoop .shared
329
+
330
+ # Make a hash of everything that could alter the contents of the database once it is fully loaded.
331
+ # Then we can compare this hash to the cached file to see if the cache is valid.
332
+ database_metadata = {
333
+ 'path' : self .databaseDirectory ,
334
+ 'database hash' : rmgpy .utilities .path_checksum ([self .databaseDirectory ]),
335
+ 'thermoLibraries' : self .thermoLibraries ,
336
+ 'rmgpy.data source hash' : rmgpy .data .getSourceHash (),
337
+ 'this source hash' : hashlib .sha1 (inspect .getsource (self .__class__ )).hexdigest (),
338
+ }
339
+ database_hash = hashlib .sha1 (cPickle .dumps (database_metadata )).hexdigest ()
340
+ cache_hash_file = os .path .join (self .outputDirectory ,'database.hash' )
341
+ cache_pickle_file = os .path .join (self .outputDirectory ,'database.pkl' )
342
+ scoop .shared .setConst (databaseFile = cache_pickle_file , databaseHash = database_hash )
343
+ if not os .path .exists (cache_pickle_file ):
344
+ logging .info ("Couldn't find a database cache file {0!r} so will reload from source." .format (cache_pickle_file ))
345
+ elif not os .path .exists (cache_hash_file ):
346
+ logging .info ("Couldn't find database cache hash file {0!r} to validate cache so will reload from source." .format (cache_hash_file ))
347
+ else :
348
+ if database_hash != open (cache_hash_file ,'r' ).read ():
349
+ logging .info ("According to hash file, it looks like database cache is not valid. Will clear it and reload." )
350
+ os .unlink (cache_hash_file )
351
+ os .unlink (cache_pickle_file )
352
+ else :
353
+ logging .info ("According to hash file, it looks like database cache is valid." )
354
+ database = cPickle .load (open (cache_pickle_file , 'rb' ))
355
+ # Check the database from the pickle really does have the hash in the database.hash file.
356
+ if database .hash == database_hash :
357
+ logging .info ("Database loaded from {0} has correct hash. Will use this cache." .format (cache_pickle_file ))
358
+ self .database = database
359
+ rmgpy .data .rmg .database = database # we need to store it in this module level variable too!
360
+ return
361
+ else :
362
+ logging .info ("Database loaded from {0} has INCORRECT hash. Will clear the cache and reload." .format (cache_pickle_file ))
363
+ os .unlink (cache_hash_file )
364
+ os .unlink (cache_pickle_file )
365
+
366
+ self .database = RMGDatabase ()
367
+ self .database .loadThermo (
368
+ path = os .path .join (self .databaseDirectory , 'thermo' ),
369
+ thermoLibraries = self .thermoLibraries ,
370
+ depository = False , # Don't bother loading the depository information, as we don't use it
371
+ )
372
+
373
+ self .database .hash = database_hash # store the hash in the database so we can check it when it is next pickled.
374
+ logging .info ("Saving database cache in {0!r}" .format (cache_pickle_file ))
375
+ self .database .saveToPickle (cache_pickle_file )
376
+ with open (cache_hash_file ,'w' ) as f :
377
+ f .write (database_hash )
378
+
379
+
309
380
310
381
def initialize (self , args ):
311
382
"""
0 commit comments