Skip to content

Commit f95a6a2

Browse files
authored
feat: add RCSR code to text description and return numpy array (#444)
1 parent eefc0e9 commit f95a6a2

File tree

1 file changed

+28
-5
lines changed

1 file changed

+28
-5
lines changed

src/mofdscribe/featurizers/text/mofdscriber.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections import Counter
55
from typing import Dict, Optional, Union
66

7+
import numpy as np
78
from moffragmentor import MOF as MOFFragmentorMOF # noqa: N811
89
from pymatgen.analysis.graphs import StructureGraph
910
from pymatgen.core import IStructure, Structure
@@ -39,6 +40,7 @@ def __init__(
3940
describer_kwargs: Optional[Dict] = None,
4041
incorporate_smiles: bool = True,
4142
describe_pores: bool = True,
43+
describe_rcsr: bool = True,
4244
) -> None:
4345
"""Construct an instance of the MOFDescriber.
4446
@@ -50,12 +52,15 @@ def __init__(
5052
incorporate_smiles (bool): If True, describe building blocks.
5153
describe_pores (bool): If True, add description of the geometry
5254
of the MOF pores.
55+
describe_rcsr (bool): If True, add RCSR code of the MOF
56+
topology.
5357
"""
5458
describer_defaults = {"describe_oxidation_states": False, "describe_bond_lengths": True}
5559
self.condenser_kwargs = condenser_kwargs or {}
5660
self.describer_kwargs = {**describer_defaults, **(describer_kwargs or {})}
5761
self.incorporate_smiles = incorporate_smiles
5862
self.describe_pores = describe_pores
63+
self.describe_rcsr = describe_rcsr
5964

6065
def _get_bb_description(self, structure: Structure, structure_graph: StructureGraph) -> str:
6166
moffragmentor_mof = MOFFragmentorMOF(structure, structure_graph)
@@ -65,7 +70,21 @@ def _get_bb_description(self, structure: Structure, structure_graph: StructureGr
6570

6671
linker_smiles = " ,".join("{} {}".format(v, k) for k, v in linker_counter.items())
6772
metal_smiles = " ,".join("{} {}".format(v, k) for k, v in metal_counter.items())
68-
return "Linkers: {}. Metal clusters: {}.".format(linker_smiles, metal_smiles)
73+
bb_string = "Linkers: {}. Metal clusters: {}. ".format(linker_smiles, metal_smiles)
74+
75+
rcsr_code = fragments.net_embedding.rcsr_code
76+
if rcsr_code and len(rcsr_code) > 1:
77+
rcsr_string = "RCSR code: {}. ".format(rcsr_code)
78+
79+
output_string = ""
80+
if self.incorporate_smiles:
81+
output_string += bb_string
82+
if self.describe_rcsr:
83+
output_string += rcsr_string
84+
85+
return output_string
86+
87+
return output_string
6988

7089
def _get_pore_description(self, structure):
7190
pore_featurizer = MOFMultipleFeaturizer(
@@ -87,11 +106,15 @@ def _get_robocrys_description(self, structure):
87106

88107
def _featurize(self, structure: Structure, structure_graph: StructureGraph):
89108
description = self._get_robocrys_description(structure)
90-
if self.incorporate_smiles:
91-
description += " " + self._get_bb_description(structure, structure_graph)
109+
if self.incorporate_smiles or self.describe_rcsr:
110+
if description[-1] != " ":
111+
description += " "
112+
description += self._get_bb_description(structure, structure_graph)
92113
if self.describe_pores:
93-
description += " " + self._get_pore_description(structure)
94-
return description
114+
if description[-1] != " ":
115+
description += " "
116+
description += self._get_pore_description(structure)
117+
return np.array([description])
95118

96119
def featurize(self, structure: Union[Structure, IStructure]):
97120
return self._featurize(structure, get_sg(structure))

0 commit comments

Comments
 (0)