Skip to content

Commit fcb5c20

Browse files
committed
Load only thermo libraries
To reduce the database pickle size, thermoEstimator now only loads thermo libraries.
1 parent 4fadd44 commit fcb5c20

File tree

2 files changed

+76
-2
lines changed

2 files changed

+76
-2
lines changed

rmgpy/rmg/main.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,77 @@ def loadDatabase(self):
306306
with open(cache_hash_file,'w') as f:
307307
f.write(database_hash)
308308

309+
def loadThermoDatabase(self):
310+
"""
311+
Load the RMG Database.
312+
313+
The data is loaded from self.databaseDirectory, according to settings in:
314+
315+
* self.thermoLibraries
316+
* self.reactionLibraries
317+
* self.seedMechanisms
318+
* self.kineticsFamilies
319+
* self.kineticsDepositories
320+
321+
If `self.kineticsEstimator == 'rate rules'` then the training set values are
322+
added and the blanks are filled in by averaging.
323+
324+
If self.outputDirectory contains :file:`database.pkl` and :file:`database.hash` files then
325+
these are checked for validity and used as a cache. Once loaded (and averages filled
326+
in if necessary) then a cache (pickle and hash) is saved.
327+
"""
328+
import inspect, hashlib, cPickle, rmgpy.utilities, scoop.shared
329+
330+
# Make a hash of everything that could alter the contents of the database once it is fully loaded.
331+
# Then we can compare this hash to the cached file to see if the cache is valid.
332+
database_metadata = {
333+
'path': self.databaseDirectory,
334+
'database hash': rmgpy.utilities.path_checksum([self.databaseDirectory]),
335+
'thermoLibraries': self.thermoLibraries,
336+
'rmgpy.data source hash': rmgpy.data.getSourceHash(),
337+
'this source hash': hashlib.sha1(inspect.getsource(self.__class__)).hexdigest(),
338+
}
339+
database_hash = hashlib.sha1(cPickle.dumps(database_metadata)).hexdigest()
340+
cache_hash_file = os.path.join(self.outputDirectory,'database.hash')
341+
cache_pickle_file = os.path.join(self.outputDirectory,'database.pkl')
342+
scoop.shared.setConst(databaseFile=cache_pickle_file, databaseHash=database_hash)
343+
if not os.path.exists(cache_pickle_file):
344+
logging.info("Couldn't find a database cache file {0!r} so will reload from source.".format(cache_pickle_file))
345+
elif not os.path.exists(cache_hash_file):
346+
logging.info("Couldn't find database cache hash file {0!r} to validate cache so will reload from source.".format(cache_hash_file))
347+
else:
348+
if database_hash != open(cache_hash_file,'r').read():
349+
logging.info("According to hash file, it looks like database cache is not valid. Will clear it and reload.")
350+
os.unlink(cache_hash_file)
351+
os.unlink(cache_pickle_file)
352+
else:
353+
logging.info("According to hash file, it looks like database cache is valid.")
354+
database = cPickle.load(open(cache_pickle_file, 'rb'))
355+
# Check the database from the pickle really does have the hash in the database.hash file.
356+
if database.hash == database_hash:
357+
logging.info("Database loaded from {0} has correct hash. Will use this cache.".format(cache_pickle_file))
358+
self.database = database
359+
rmgpy.data.rmg.database = database # we need to store it in this module level variable too!
360+
return
361+
else:
362+
logging.info("Database loaded from {0} has INCORRECT hash. Will clear the cache and reload.".format(cache_pickle_file))
363+
os.unlink(cache_hash_file)
364+
os.unlink(cache_pickle_file)
365+
366+
self.database = RMGDatabase()
367+
self.database.loadThermo(
368+
path = os.path.join(self.databaseDirectory, 'thermo'),
369+
thermoLibraries = self.thermoLibraries,
370+
depository = False, # Don't bother loading the depository information, as we don't use it
371+
)
372+
373+
self.database.hash = database_hash # store the hash in the database so we can check it when it is next pickled.
374+
logging.info("Saving database cache in {0!r}".format(cache_pickle_file))
375+
self.database.saveToPickle(cache_pickle_file)
376+
with open(cache_hash_file,'w') as f:
377+
f.write(database_hash)
378+
379+
309380

310381
def initialize(self, args):
311382
"""

thermoEstimator.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def runThermoEstimator(inputFile):
3232
rmg.loadThermoInput(inputFile)
3333

3434
# initialize and load the database as well as any QM settings
35-
rmg.loadDatabase()
35+
rmg.loadThermoDatabase()
3636
if rmg.quantumMechanics:
3737
logging.debug("Initialize QM")
3838
rmg.quantumMechanics.initialize()
@@ -60,9 +60,10 @@ def runThermoEstimator(inputFile):
6060
)
6161
output.write(writeThermoEntry(species))
6262
output.write('\n')
63+
library.save(os.path.join(rmg.outputDirectory,'ThermoLibrary.py'))
6364

6465
output.close()
65-
library.save(os.path.join(rmg.outputDirectory,'ThermoLibrary.py'))
66+
6667

6768

6869
################################################################################
@@ -87,6 +88,8 @@ def runThermoEstimator(inputFile):
8788
group = parser.add_mutually_exclusive_group()
8889
group.add_argument('-d', '--debug', action='store_true', help='print debug information')
8990
group.add_argument('-q', '--quiet', action='store_true', help='only print warnings and errors')
91+
92+
9093
args = parser.parse_args()
9194

9295
inputFile = os.path.abspath(args.input[0])

0 commit comments

Comments
 (0)