Skip to content

Commit 57fdfaa

Browse files
committed
Merge pull request #196 from nyee/rebaseDatabaseTest
Rebase database test
2 parents abf0c74 + bd42226 commit 57fdfaa

File tree

4 files changed

+312
-38
lines changed

4 files changed

+312
-38
lines changed

rmgpy/data/base.py

Lines changed: 124 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -786,7 +786,7 @@ def descendants(self, node):
786786
descendants.extend(self.descendants(child))
787787
return descendants
788788

789-
def isWellFormed(self):
789+
def checkWellFormed(self):
790790
"""
791791
Return :data:`True` if the database is well-formed. A well-formed
792792
database has an entry in the dictionary for every entry in the tree, and
@@ -797,47 +797,123 @@ def isWellFormed(self):
797797
nodes in the tree, if the tree is present; this is for databases with
798798
multiple trees, e.g. the kinetics databases.
799799
"""
800-
801-
wellFormed = True
800+
801+
802+
from rmgpy.data.kinetics.family import KineticsFamily
803+
804+
#list of nodes that are not wellFormed
805+
noGroup=[]
806+
noMatchingGroup={}
807+
notInTree=[]
808+
notSubgroup=[]
809+
probablyProduct=[]
810+
811+
# Give correct arguments for each type of database
812+
if isinstance(self, KineticsFamily):
813+
library=self.rules.entries
814+
groups=self.groups.entries
815+
treeIsPresent=True
816+
topNodes=self.getRootTemplate()
802817

803818
# Make list of all nodes in library
804-
libraryNodes = []
805-
for nodes in self.library:
806-
libraryNodes.extend(nodes.split(';'))
807-
libraryNodes = list(set(libraryNodes))
819+
libraryNodes=[]
820+
libraryNodesSplit = []
821+
for nodes in library:
822+
libraryNodes.append(nodes)
823+
libraryNodesSplit.extend(nodes.split(';'))
824+
libraryNodesSplit = list(set(libraryNodesSplit))
808825

826+
809827

810-
for node in libraryNodes:
828+
try:
829+
for node in libraryNodesSplit:
811830

812831
# All nodes in library must be in dictionary
813-
try:
814-
if node not in self.entries:
815-
raise DatabaseError('Node "{0}" in library is not present in dictionary.'.format(node))
816-
except DatabaseError, e:
817-
wellFormed = False
818-
logging.error(str(e))
819-
820-
# If a tree is present, all nodes in library should be in tree
821-
# (Technically the database is still well-formed, but let's warn
822-
# the user anyway
823-
if len(self.tree.parent) > 0:
824-
try:
825-
if node not in self.tree.parent:
826-
raise DatabaseError('Node "{0}" in library is not present in tree.'.format(node))
827-
except DatabaseError, e:
828-
logging.warning(str(e))
829-
830-
# If a tree is present, all nodes in tree must be in dictionary
831-
if self.tree is not None:
832-
for node in self.tree.parent:
833-
try:
834-
if node not in self.entries:
835-
raise DatabaseError('Node "{0}" in tree is not present in dictionary.'.format(node))
836-
except DatabaseError, e:
837-
wellFormed = False
838-
logging.error(str(e))
839-
840-
return wellFormed
832+
if node not in groups:
833+
noGroup.append(node)
834+
835+
#no point checking in tree if it doesn't even exist in groups
836+
for libraryNode in libraryNodes:
837+
nodes=libraryNode.split(';')
838+
for libraryEntry in library[libraryNode]:
839+
for node in nodes:
840+
for libraryGroup in libraryEntry.item.reactants:
841+
try:
842+
if groups[node].item.isIsomorphic(libraryGroup):
843+
break
844+
except AttributeError:
845+
if isinstance(groups[node].item, LogicOr) and isinstance(libraryGroup, LogicOr):
846+
if groups[node].item==libraryGroup:
847+
break
848+
except TypeError:
849+
print libraryGroup, type(libraryGroup)
850+
except KeyError:
851+
noGroup.append(node)
852+
else:
853+
noMatchingGroup[node]=libraryNode
854+
855+
if treeIsPresent:
856+
# All nodes need to be in the tree
857+
# This is true when ascending through parents leads to a top node
858+
for nodeName in groups:
859+
ascendParent=self.groups.entries[nodeName]
860+
861+
while ascendParent not in topNodes:
862+
child=ascendParent
863+
ascendParent=ascendParent.parent
864+
if ascendParent is None or child not in ascendParent.children:
865+
if child.index==-1:
866+
probablyProduct.append(child.label)
867+
break
868+
else:
869+
# If a group is not in a tree, we want to save the uppermost parent, not necessarily the original node
870+
notInTree.append(child.label)
871+
break
872+
#check if child is actually subgroup of parent
873+
ascendParent=self.groups.entries[nodeName].parent
874+
if ascendParent is not None:
875+
try:
876+
if not ascendParent.item.isSubgraphIsomorphic(self.groups.entries[nodeName].item):
877+
notSubgroup.append(nodeName)
878+
except AttributeError:
879+
if isinstance(groups[node].item, LogicOr) and isinstance(libraryGroup, LogicOr):
880+
if groups[node].item==libraryGroup:
881+
break
882+
except TypeError:
883+
print libraryGroup, type(libraryGroup)
884+
# The adj list of each node actually needs to be subset of its parent's adjlist
885+
#More to come later -nyee
886+
except DatabaseError, e:
887+
logging.error(str(e))
888+
889+
# # If a tree is present, all nodes in library should be in tree
890+
# # (Technically the database is still well-formed, but let's warn
891+
# # the user anyway
892+
# if len(self.tree.parent) > 0:
893+
# try:
894+
# if node not in self.tree.parent:
895+
# raise DatabaseError('Node "{0}" in library is not present in tree.'.format(node))
896+
# except DatabaseError, e:
897+
# logging.warning(str(e))
898+
#
899+
# # If a tree is present, all nodes in tree must be in dictionary
900+
# if self.tree is not None:
901+
# for node in self.tree.parent:
902+
# try:
903+
# if node not in self.entries:
904+
# raise DatabaseError('Node "{0}" in tree is not present in dictionary.'.format(node))
905+
# except DatabaseError, e:
906+
# wellFormed = False
907+
# logging.error(str(e))
908+
909+
# for libraryRule in library:
910+
#check the groups
911+
912+
#eliminate duplicates
913+
noGroup=list(set(noGroup))
914+
notInTree=list(set(notInTree))
915+
916+
return (noGroup, noMatchingGroup, notInTree, notSubgroup, probablyProduct)
841917

842918
def matchNodeToStructure(self, node, structure, atoms):
843919
"""
@@ -988,6 +1064,18 @@ def matchToStructure(self,database,structure,atoms):
9881064
return True != self.invert
9891065
return False != self.invert
9901066

1067+
def matchToLogicOr(self, other):
1068+
"""
1069+
Is other the same LogicOr group as self?
1070+
"""
1071+
if len(self.components)!=len(other.components):
1072+
return False
1073+
else:
1074+
for node in self.components:
1075+
if node not in other.components:
1076+
return False
1077+
return True
1078+
9911079
def getPossibleStructures(self, entries):
9921080
"""
9931081
Return a list of the possible structures below this node.

rmgpy/data/kinetics/family.py

Lines changed: 146 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from copy import copy, deepcopy
3939

4040
from rmgpy.data.base import Database, Entry, LogicNode, LogicOr, ForbiddenStructures,\
41-
ForbiddenStructureException, getAllCombinations
41+
ForbiddenStructureException, getAllCombinations, DatabaseError
4242
from rmgpy.reaction import Reaction
4343
from rmgpy.kinetics import Arrhenius, ArrheniusEP, ThirdBody, Lindemann, Troe, \
4444
PDepArrhenius, MultiArrhenius, MultiPDepArrhenius, \
@@ -1941,3 +1941,148 @@ def getRateCoefficientUnits(self):
19411941
return 's^-1'
19421942
else:
19431943
raise ValueError('Unable to determine units of rate coefficient for reaction family "{0}".'.format(self.label))
1944+
1945+
def checkWellFormed(self):
1946+
"""
1947+
Returns a tuple of malformed database entries:
1948+
1949+
noGroup is a list of nodes in the rules that has no corresponding group in
1950+
groups.py
1951+
1952+
noMatchingGroup is a dictionary with entry labels from the rules as a key
1953+
and entry labels from groups as values. These are groups where rule.py's
1954+
adj list does not match group.py's.
1955+
1956+
notInTree is a list of groups that do not appear in the tree
1957+
1958+
notSubgroup is a dictionary with group labels as keys and atom indexes
1959+
as values. Each key is a group where the child's adj list is not a
1960+
true child of it's parent. The list of indexes corresponds to the
1961+
child's adj list index, where the atom is not a true child.
1962+
1963+
probablyProduct is a list of groups which do not apepar in the
1964+
tree, but are probably products (as opposed to reactants) which
1965+
are created in the database loading. These are not necessarily
1966+
malformations, but because I'm not certain where they came from,
1967+
I decided to list them.
1968+
"""
1969+
1970+
1971+
#A function to add to the not in Subgroup dictionary
1972+
def appendToDict(dictionary, key, value):
1973+
if key not in dictionary:
1974+
dictionary[key]=[value]
1975+
else:
1976+
dictionary[key].append(value)
1977+
return dictionary
1978+
1979+
#list of nodes that are not wellFormed
1980+
noGroup=[]
1981+
noMatchingGroup={}
1982+
tempNoMatchingGroup={}
1983+
notInTree=[]
1984+
notUnique={}
1985+
notSubgroup={}
1986+
probablyProduct=[]
1987+
1988+
# Give correct arguments for each type of database
1989+
# if isinstance(self, KineticsFamily):
1990+
library=self.rules.entries
1991+
groups=self.groups.entries
1992+
groupsCopy=copy(groups)
1993+
topNodes=self.getRootTemplate()
1994+
1995+
# Make list of all node names in library
1996+
libraryNodes=[]
1997+
for nodes in library:
1998+
libraryNodes.append(nodes)
1999+
2000+
try:
2001+
#Each label in rules.py should be be in the form group1;group2;group3 etc
2002+
#and each group must appear in groups.py
2003+
for libraryNode in libraryNodes:
2004+
nodes=libraryNode.split(';')
2005+
for libraryEntry in library[libraryNode]:
2006+
for nodeName in nodes:
2007+
if nodeName not in groups:
2008+
noGroup.append(nodeName)
2009+
#If the node is not in the dictionary, we can't do the rest of the check
2010+
continue
2011+
#Each adj list in rules.py should match the adj list in group's.py
2012+
for libraryGroup in libraryEntry.item.reactants:
2013+
#break if we find a match between two groups
2014+
if isinstance(groups[nodeName].item, Group) and isinstance(libraryGroup, Group):
2015+
if groups[nodeName].item.isIsomorphic(libraryGroup):
2016+
break
2017+
#break if we find a match between two logic nodes
2018+
elif isinstance(groups[nodeName].item, LogicOr) and isinstance(libraryGroup, LogicOr):
2019+
if groups[nodeName].item.matchToLogicOr(libraryGroup):
2020+
break
2021+
#Otherwise no match is found, so we add it to the tempNoMatchingGroup
2022+
else:
2023+
tempNoMatchingGroup=appendToDict(tempNoMatchingGroup, libraryNode, nodeName)
2024+
#eliminate duplicates
2025+
for key, nodeList in tempNoMatchingGroup.iteritems():
2026+
noMatchingGroup[key]=list(set(nodeList))
2027+
2028+
# Each group in groups.py should appear in the tree
2029+
# This is true when ascending through parents leads to a top node
2030+
for nodeName in groups:
2031+
nodeGroup=self.groups.entries[nodeName]
2032+
nodeGroupItem=nodeGroup.item
2033+
ascendParent=nodeGroup
2034+
while ascendParent not in topNodes:
2035+
child=ascendParent
2036+
ascendParent=ascendParent.parent
2037+
if ascendParent is None or child not in ascendParent.children:
2038+
if child.index==-1:
2039+
probablyProduct.append(child.label)
2040+
break
2041+
else:
2042+
# If a group is not in a tree, we want to save the uppermost parent, not necessarily the original node
2043+
notInTree.append(child.label)
2044+
break
2045+
2046+
#each node should also be unique:
2047+
del groupsCopy[nodeName]
2048+
for nodeName2 in groupsCopy:
2049+
nodeGroup2Item=self.groups.entries[nodeName2].item
2050+
if isinstance(nodeGroup2Item, Group) and isinstance(nodeGroupItem, Group):
2051+
if nodeGroupItem.isIdentical(nodeGroup2Item):
2052+
notUnique=appendToDict(notUnique, nodeName, nodeName2)
2053+
if isinstance(nodeGroup2Item, LogicOr) and isinstance(nodeGroupItem, LogicOr):
2054+
if nodeGroupItem.matchToLogicOr(nodeGroup2Item):
2055+
notUnique=appendToDict(notUnique, nodeName, nodeName2)
2056+
2057+
#For a correct child-parent relationship, each atom in the parent should have a corresponding child atom in the child.
2058+
nodeParent=nodeGroup.parent
2059+
#Atoms may be in a different order initially. Need to sort both child and parent first
2060+
#Don't need to do check for topNodes
2061+
if nodeParent is not None:
2062+
if isinstance(nodeParent.item, LogicOr):
2063+
if not nodeGroup.label in nodeParent.item.components:
2064+
#-1 index means the child is not in the LogicOr
2065+
notSubgroup[nodeName]=nodeParent.label
2066+
continue
2067+
else:
2068+
#if the parent is a LogicOr, we want to keep ascending until we get to a group or hit a discontinuity (could be
2069+
#malformed tree or just ascending past the top node)
2070+
while isinstance(nodeParent.item, LogicOr):
2071+
nodeParent=nodeParent.parent
2072+
if nodeParent == None: break
2073+
if nodeParent == None: continue
2074+
# nodeParent.item.sortAtoms()
2075+
elif isinstance(nodeGroup.item, LogicOr):
2076+
print nodeGroup, ' is an intermediate LogicOr. See if it can be replaced with a adj list.'
2077+
continue
2078+
#If both the parent and child are graphs, we can use the function isSubgroupIsomorphic if it is actually a child
2079+
if not nodeGroup.item.isSubgraphIsomorphic(nodeParent.item):
2080+
notSubgroup[nodeName]=nodeParent.label
2081+
except DatabaseError, e:
2082+
logging.error(str(e))
2083+
2084+
#eliminate duplicates
2085+
noGroup=list(set(noGroup))
2086+
notInTree=list(set(notInTree))
2087+
2088+
return (noGroup, noMatchingGroup, notInTree, notUnique, notSubgroup, probablyProduct)

rmgpy/molecule/group.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,5 @@ cdef class Group(Graph):
129129
cpdef bint isSubgraphIsomorphic(self, Graph other, dict initialMap=?) except -2
130130

131131
cpdef list findSubgraphIsomorphisms(self, Graph other, dict initialMap=?)
132+
133+
cpdef bint isIdentical(self, Graph other)

0 commit comments

Comments
 (0)