From b716c4bea38849adb798259170b7fc27c90dd1fc Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Fri, 3 Sep 2021 17:03:00 +0200 Subject: [PATCH 01/47] WIP --- aiida_gaussian_datatypes/libraries.py | 12 ++++++++++++ aiida_gaussian_datatypes/pseudopotential/cli.py | 11 +++++++++++ 2 files changed, 23 insertions(+) create mode 100644 aiida_gaussian_datatypes/libraries.py diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py new file mode 100644 index 0000000..75c8e16 --- /dev/null +++ b/aiida_gaussian_datatypes/libraries.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: MIT + +from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar + +class _ExternalLibrary: + + def fetch(self): + pass + +class MitasLibrary(_ExternalLibrary): + pass diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index f5f3cc8..dd30c47 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -223,3 +223,14 @@ def dump_pseudo(sym, name, tags, output_format, data): echo.echo_info("Dumping {}/{} ({})...".format(pseudo.name, pseudo.element, pseudo.uuid), err=True) writers[output_format](pseudo, sys.stdout) + +# fmt: off +@cli.command('install') +@click.argument('pseudopotential_library', type=click.Choice(,) +@decorators.with_dbenv() +# fmt: on +def install_family(pseudopotential_library): + """ + Installs a family of pseudo potentials from a remote repository + """ + pass From 9ba12152fc69852134be18ee5c75d4ef60a27b1c Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Tue, 7 Sep 2021 18:41:19 +0200 Subject: [PATCH 02/47] WIP --- .../pseudopotential/data.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 691e966..8c4212d 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -8,6 +8,7 @@ import dataclasses from decimal import Decimal +from icecream import ic from aiida.common.exceptions import ( MultipleObjectsError, @@ -352,6 +353,44 @@ def decimal2str(val): return [cls(**p) for p in pseudos] + @classmethod + def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False): + """ + Constructs a list with pseudopotential objects from a Pseudopotential in GAMESS format + + :param fhandle: open file handle + :param filters: a dict with attribute filter functions + :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version)) + :param ignore_invalid: whether to ignore invalid entries silently + :rtype: list + """ + + if not filters: + filters = {} + + pseudos = [ + p + for p in ( + dataclasses.asdict(p, dict_factory=dict_fact) + for p in PseudopotentialData.datafile_iter(fhandle, keep_going=ignore_invalid) + ) + if matches_criteria(p) + ] + + if duplicate_handling == "ignore": # simply filter duplicates + pass + + elif duplicate_handling == "error": + pass + + elif duplicate_handling == "new": + pass + + else: + raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") + + return [] + def to_cp2k(self, fhandle): """ Write this Pseudopotential instance to a file in CP2K format. From 141261ca0a79c3429063c0f4f50e50a4ef4a0d36 Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Wed, 8 Sep 2021 16:46:38 +0200 Subject: [PATCH 03/47] WIP --- .../pseudopotential/data.py | 100 ++++++++++++++---- setup.json | 3 +- 2 files changed, 81 insertions(+), 22 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 8c4212d..7077faa 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -32,9 +32,6 @@ def __init__( aliases=None, tags=None, n_el=None, - local=None, - non_local=None, - nlcc=None, version=1, **kwargs, ): @@ -57,18 +54,12 @@ def __init__( if not n_el: n_el = [] - if not non_local: - non_local = [] - - if not nlcc: - nlcc = [] - if "label" not in kwargs: kwargs["label"] = name super().__init__(**kwargs) - for attr in ("name", "element", "tags", "aliases", "n_el", "local", "non_local", "nlcc", "version"): + for attr in ("name", "element", "tags", "aliases", "n_el", "version"): self.set_attribute(attr, locals()[attr]) def store(self, *args, **kwargs): @@ -351,7 +342,7 @@ def decimal2str(val): else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - return [cls(**p) for p in pseudos] + return [GTHPseudopotential(**p) for p in pseudos] @classmethod def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False): @@ -365,17 +356,12 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ :rtype: list """ - if not filters: - filters = {} + for ii, line in enumerate(fhandle): + if len(line.strip()) == 0: continue + if ii == 0: + name, gen, core_electrons, number = line.split() + continue - pseudos = [ - p - for p in ( - dataclasses.asdict(p, dict_factory=dict_fact) - for p in PseudopotentialData.datafile_iter(fhandle, keep_going=ignore_invalid) - ) - if matches_criteria(p) - ] if duplicate_handling == "ignore": # simply filter duplicates pass @@ -416,6 +402,78 @@ def get_matching_basisset(self, *args, **kwargs): return BasisSet.get(element=self.element, *args, **kwargs) +class GTHPseudopotential(Pseudopotential): + + def __init__( + self, + local=None, + non_local=None, + nlcc=None, + **kwargs): + """ + :param element: string containing the name of the element + :param name: identifier for this basis set, usually something like -[-q] + :param aliases: alternative names + :param tags: additional tags + :param n_el: number of valence electrons covered by this basis set + :param local: see :py:attr:`~local` + :param local: see :py:attr:`~non_local` + """ + + if not non_local: + non_local = [] + + if not nlcc: + nlcc = [] + + super().__init__(**kwargs) + + for attr in ("local", "non_local", "nlcc"): + self.set_attribute(attr, locals()[attr]) + + @property + def local(self): + """ + Return the local part + + The format of the returned dictionary:: + + { + 'r': float, + 'coeffs': [float, float, ...], + } + + :rtype:dict + """ + return self.get_attribute("local", None) + + @property + def non_local(self): + """ + Return a list of non-local projectors (for l=0,1...). + + Each list element will have the following format:: + + { + 'r': float, + 'nproj': int, + 'coeffs': [float, float, ...], # only the upper-triangular elements + } + + :rtype:list + """ + return self.get_attribute("non_local", []) + + @property + def nlcc(self): + """ + Return a list of the non-local core-corrections data + + :rtype:list + """ + return self.get_attribute("nlcc", []) + + def _dict2pseudodata(data): from cp2k_input_tools.pseudopotentials import ( PseudopotentialData, diff --git a/setup.json b/setup.json index 381a559..7122cc7 100644 --- a/setup.json +++ b/setup.json @@ -22,7 +22,8 @@ "entry_points": { "aiida.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet", - "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential" + "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential", + "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential" ], "aiida.cmdline.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli", From 440796c6cf596b07c4fd52797d2a354686542f3f Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Wed, 8 Sep 2021 19:09:57 +0200 Subject: [PATCH 04/47] WIP Gamess pseudo reader almost works --- .../pseudopotential/cli.py | 3 +- .../pseudopotential/data.py | 98 ++++++++++--------- setup.json | 3 +- 3 files changed, 56 insertions(+), 48 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index f5f3cc8..e0f4602 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -71,7 +71,7 @@ def cli(): help="filter by a tag (all tags must be present if specified multiple times)") @click.option( 'fformat', '-f', '--format', - type=click.Choice(['cp2k', ]), default='cp2k', + type=click.Choice(['cp2k', 'gamess' ]), default='cp2k', help="the format of the pseudopotential file") @click.option( '--duplicates', @@ -94,6 +94,7 @@ def import_pseudo(pseudopotential_file, fformat, sym, tags, duplicates, ignore_i loaders = { "cp2k": Pseudopotential.from_cp2k, + "gamess": Pseudopotential.from_gamess, } filters = { diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 7077faa..c6d2bc6 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -41,8 +41,6 @@ def __init__( :param aliases: alternative names :param tags: additional tags :param n_el: number of valence electrons covered by this basis set - :param local: see :py:attr:`~local` - :param local: see :py:attr:`~non_local` """ if not aliases: @@ -85,7 +83,6 @@ def _validate(self): try: # directly raises a ValidationError for the pseudo data if something's amiss - _dict2pseudodata(self.attributes) assert isinstance(self.name, str) and self.name assert ( @@ -152,48 +149,6 @@ def n_el(self): return self.get_attribute("n_el", []) - @property - def local(self): - """ - Return the local part - - The format of the returned dictionary:: - - { - 'r': float, - 'coeffs': [float, float, ...], - } - - :rtype:dict - """ - return self.get_attribute("local", None) - - @property - def non_local(self): - """ - Return a list of non-local projectors (for l=0,1...). - - Each list element will have the following format:: - - { - 'r': float, - 'nproj': int, - 'coeffs': [float, float, ...], # only the upper-triangular elements - } - - :rtype:list - """ - return self.get_attribute("non_local", []) - - @property - def nlcc(self): - """ - Return a list of the non-local core-corrections data - - :rtype:list - """ - return self.get_attribute("nlcc", []) - @classmethod def get(cls, element, name=None, version="latest", match_aliases=True, group_label=None, n_el=None): """ @@ -356,11 +311,33 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ :rtype: list """ + functions = [] + ns = 0 for ii, line in enumerate(fhandle): if len(line.strip()) == 0: continue if ii == 0: name, gen, core_electrons, number = line.split() continue + if ns == 0: + ns = int(line) + functions.append({"prefactors" : [], + "polynoms" : [], + "exponents" : []}) + else: + for key, value in zip(("prefactors", "polynoms", "exponents"), map(float, line.split())): + functions[-1][key].append(value) + ns -= 1 + + """ + TODO properly extract name + """ + element = name.split("-")[0] + + data = {"functions" : functions, + "element" : element, + "aliases" : [name], + "name" : name, + "n_el" : [1]} if duplicate_handling == "ignore": # simply filter duplicates @@ -375,7 +352,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - return [] + return [ECPPseudopotential(**data)] def to_cp2k(self, fhandle): """ @@ -473,6 +450,35 @@ def nlcc(self): """ return self.get_attribute("nlcc", []) + def _validate(self): + super()._validate() + + try: + _dict2pseudodata(self.attributes) + except Exception as exc: + raise ValidationError("One or more invalid fields found") from exc + + +class ECPPseudopotential(Pseudopotential): + + def __init__( + self, + functions=None, + lmax=1, + **kwargs): + """ + :param functions: + :param lmax: maximum angular momentum + """ + + if not functions: + functions = [] + + super().__init__(**kwargs) + + for attr in ("functions", "lmax"): + self.set_attribute(attr, locals()[attr]) + def _dict2pseudodata(data): from cp2k_input_tools.pseudopotentials import ( diff --git a/setup.json b/setup.json index 7122cc7..c43e59d 100644 --- a/setup.json +++ b/setup.json @@ -23,7 +23,8 @@ "aiida.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet", "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential", - "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential" + "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential", + "gaussian.ecppseudo = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential" ], "aiida.cmdline.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli", From a2ceabf94fee6c230da8b7053c666aa549b5da0a Mon Sep 17 00:00:00 2001 From: addman Date: Tue, 21 Sep 2021 11:35:48 +0200 Subject: [PATCH 05/47] WIP --- .../pseudopotential/cli.py | 4 +- .../pseudopotential/data.py | 77 +++++++++++++++++-- setup.json | 4 +- 3 files changed, 74 insertions(+), 11 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index e0f4602..a82d765 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -179,7 +179,8 @@ def list_pseudo(sym, name, tags): help="filter by name") @click.option('tags', '--tag', '-t', multiple=True, help="filter by a tag (all tags must be present if specified multiple times)") -@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', ]), default='cp2k', +@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', + 'gamess']), default='cp2k', help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ])) @decorators.with_dbenv() # fmt: on @@ -194,6 +195,7 @@ def dump_pseudo(sym, name, tags, output_format, data): writers = { "cp2k": Pseudopotential.to_cp2k, + "gamess": Pseudopotential.to_gamess, } if data: diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index c6d2bc6..ed4a9c5 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -316,7 +316,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ for ii, line in enumerate(fhandle): if len(line.strip()) == 0: continue if ii == 0: - name, gen, core_electrons, number = line.split() + name, gen, core_electrons, lmax = line.split() continue if ns == 0: ns = int(line) @@ -328,15 +328,25 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ functions[-1][key].append(value) ns -= 1 + """ + Cast polynoms to Integers + """ + functions[-1]["polynoms"] = [ int(x) for x in functions[-1]["polynoms"] ] + """ TODO properly extract name """ element = name.split("-")[0] + lmax = int(lmax) + core_electrons = int(core_electrons) + data = {"functions" : functions, "element" : element, "aliases" : [name], "name" : name, + "core_electrons" : core_electrons, + "lmax" : lmax, "n_el" : [1]} @@ -352,7 +362,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - return [ECPPseudopotential(**data)] + return [SMPseudopotential(**data)] def to_cp2k(self, fhandle): """ @@ -361,10 +371,39 @@ def to_cp2k(self, fhandle): :param fhandle: open file handle """ - fhandle.write(f"# from AiiDA Pseudopotential\n") - for line in _dict2pseudodata(self.attributes).cp2k_format_line_iter(): - fhandle.write(line) - fhandle.write("\n") + if isinstance(self, GTHPseudopotential): + + fhandle.write(f"# from AiiDA Pseudopotential\n") + for line in _dict2pseudodata(self.attributes).cp2k_format_line_iter(): + fhandle.write(line) + fhandle.write("\n") + + else: + """ + make an error + """ + pass + + def to_gamess(self, fhandle): + """ + Write this Pseudopotential instance to a file in Gamess format. + + :param fhandle: open file handle + """ + + if isinstance(self, SMPseudopotential): + fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n") + for fun in self.functions: + fhandle.write(f"{len(fun)}\n") + for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]): + fhandle.write(f"{prefactor:10.7f} {polynom:d} {exponent:10.7f}\n") + + + else: + """ + make an error + """ + pass def get_matching_basisset(self, *args, **kwargs): """ @@ -459,12 +498,13 @@ def _validate(self): raise ValidationError("One or more invalid fields found") from exc -class ECPPseudopotential(Pseudopotential): +class SMPseudopotential(Pseudopotential): def __init__( self, functions=None, lmax=1, + core_electrons=0, **kwargs): """ :param functions: @@ -476,9 +516,30 @@ def __init__( super().__init__(**kwargs) - for attr in ("functions", "lmax"): + for attr in ("functions", "lmax", "core_electrons"): self.set_attribute(attr, locals()[attr]) + @property + def lmax(self): + """ + :rtype:int + """ + return self.get_attribute("lmax", []) + + @property + def core_electrons(self): + """ + :rtype:int + """ + return self.get_attribute("core_electrons", []) + + @property + def functions(self): + """ + :rtype:int + """ + return self.get_attribute("functions", []) + def _dict2pseudodata(data): from cp2k_input_tools.pseudopotentials import ( diff --git a/setup.json b/setup.json index c43e59d..7e98820 100644 --- a/setup.json +++ b/setup.json @@ -23,8 +23,8 @@ "aiida.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet", "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential", - "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential", - "gaussian.ecppseudo = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential" + "gaussian.pseudo.gthpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential", + "gaussian.pseudo.smpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:SMPseudopotential" ], "aiida.cmdline.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli", From 9e6089b120609f71fce657fca2b520b5455e36eb Mon Sep 17 00:00:00 2001 From: addman Date: Tue, 21 Sep 2021 11:55:49 +0200 Subject: [PATCH 06/47] Add PP type when dumping list if PPs --- aiida_gaussian_datatypes/pseudopotential/cli.py | 6 ++++-- aiida_gaussian_datatypes/pseudopotential/data.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index a82d765..6ac0ad8 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -28,6 +28,7 @@ def _formatted_table_import(pseudos): def row(num, pseudo): return ( num + 1, + pseudo.__name__.replace("Pseudopotential", ""), pseudo.element, _names_column(pseudo.name, pseudo.aliases), ", ".join(pseudo.tags), @@ -36,7 +37,7 @@ def row(num, pseudo): ) table_content = [row(n, p) for n, p in enumerate(pseudos)] - return tabulate.tabulate(table_content, headers=["Nr.", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"]) + return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"]) def _formatted_table_list(pseudos): @@ -45,6 +46,7 @@ def _formatted_table_list(pseudos): def row(pseudo): return ( pseudo.uuid, + pseudo.__name__.replace("Pseudopotential", ""), pseudo.element, _names_column(pseudo.name, pseudo.aliases), ", ".join(pseudo.tags), @@ -53,7 +55,7 @@ def row(pseudo): ) table_content = [row(p) for p in pseudos] - return tabulate.tabulate(table_content, headers=["ID", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"]) + return tabulate.tabulate(table_content, headers=["ID", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"]) @verdi_data.group("gaussian.pseudo") diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index ed4a9c5..ca29a89 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -25,6 +25,8 @@ class Pseudopotential(Data): fixme: extend to NLCC pseudos. """ + __name__ = "Pseudopotential" + def __init__( self, element=None, @@ -420,6 +422,8 @@ def get_matching_basisset(self, *args, **kwargs): class GTHPseudopotential(Pseudopotential): + __name__ = "GTHPseudopotential" + def __init__( self, local=None, @@ -500,6 +504,8 @@ def _validate(self): class SMPseudopotential(Pseudopotential): + __name__ = "SMPseudopotential" + def __init__( self, functions=None, From d6498efcf36d0132b31f0891aa4eb58e57840027 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 22 Sep 2021 14:33:14 +0200 Subject: [PATCH 07/47] Rename new PPs again. Minor changes. --- .../pseudopotential/cli.py | 4 +- .../pseudopotential/data.py | 37 +++++++++++-------- setup.json | 2 +- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index 6ac0ad8..f538c3a 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -28,7 +28,7 @@ def _formatted_table_import(pseudos): def row(num, pseudo): return ( num + 1, - pseudo.__name__.replace("Pseudopotential", ""), + pseudo.__name__.replace("Pseudopotential", "") if hasattr(pseudo, "__name__") else "", pseudo.element, _names_column(pseudo.name, pseudo.aliases), ", ".join(pseudo.tags), @@ -46,7 +46,7 @@ def _formatted_table_list(pseudos): def row(pseudo): return ( pseudo.uuid, - pseudo.__name__.replace("Pseudopotential", ""), + pseudo.__name__.replace("Pseudopotential", "") if hasattr(pseudo, "__name__") else "", pseudo.element, _names_column(pseudo.name, pseudo.aliases), ", ".join(pseudo.tags), diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index ca29a89..9ebc7d4 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -313,6 +313,10 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ :rtype: list """ + """ + Parser for Gamess format + """ + functions = [] ns = 0 for ii, line in enumerate(fhandle): @@ -343,13 +347,13 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ core_electrons = int(core_electrons) - data = {"functions" : functions, - "element" : element, - "aliases" : [name], - "name" : name, + data = {"functions" : functions, + "element" : element, + "aliases" : [name], + "name" : name, "core_electrons" : core_electrons, - "lmax" : lmax, - "n_el" : [1]} + "lmax" : lmax, + "n_el" : None} if duplicate_handling == "ignore": # simply filter duplicates @@ -364,7 +368,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - return [SMPseudopotential(**data)] + return [ECPPseudopotential(**data)] def to_cp2k(self, fhandle): """ @@ -393,7 +397,7 @@ def to_gamess(self, fhandle): :param fhandle: open file handle """ - if isinstance(self, SMPseudopotential): + if isinstance(self, ECPPseudopotential): fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n") for fun in self.functions: fhandle.write(f"{len(fun)}\n") @@ -431,11 +435,6 @@ def __init__( nlcc=None, **kwargs): """ - :param element: string containing the name of the element - :param name: identifier for this basis set, usually something like -[-q] - :param aliases: alternative names - :param tags: additional tags - :param n_el: number of valence electrons covered by this basis set :param local: see :py:attr:`~local` :param local: see :py:attr:`~non_local` """ @@ -502,9 +501,9 @@ def _validate(self): raise ValidationError("One or more invalid fields found") from exc -class SMPseudopotential(Pseudopotential): +class ECPPseudopotential(Pseudopotential): - __name__ = "SMPseudopotential" + __name__ = "ECPPseudopotential" def __init__( self, @@ -528,6 +527,8 @@ def __init__( @property def lmax(self): """ + Return maximum angular momentum + :rtype:int """ return self.get_attribute("lmax", []) @@ -535,6 +536,8 @@ def lmax(self): @property def core_electrons(self): """ + Returns number of core electrons + :rtype:int """ return self.get_attribute("core_electrons", []) @@ -542,7 +545,9 @@ def core_electrons(self): @property def functions(self): """ - :rtype:int + Returns list of basis functions + + :rtype:list """ return self.get_attribute("functions", []) diff --git a/setup.json b/setup.json index 7e98820..6d59c57 100644 --- a/setup.json +++ b/setup.json @@ -24,7 +24,7 @@ "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet", "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential", "gaussian.pseudo.gthpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential", - "gaussian.pseudo.smpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:SMPseudopotential" + "gaussian.pseudo.ecppseudopotential = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential" ], "aiida.cmdline.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli", From 4691b81355960d797fa6d4deb872fef4c607ce25 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 22 Sep 2021 14:33:58 +0200 Subject: [PATCH 08/47] Fix corrupted tests. For strange reasons retrieved PPs were in different order. I do not consider this as an error, therefore, I changed the assert comparission of maps of element-lists to maps of element-sets. Also one validation test had to be changed. --- tests/test_group.py | 10 ++++++---- tests/test_pseudo_data.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/test_group.py b/tests/test_group.py index 5aceea9..afb7ab2 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -33,10 +33,11 @@ def test_pseudopotential_group_get(): pseudogroup.add_nodes([pseudo.store() for pseudo in pseudos]) retrieved_pseudos = pseudogroup.get_pseudos(elements=["Li", "H"]) + retrieved_pseudos = {key: {x for x in val} for key, val in retrieved_pseudos.items()} assert retrieved_pseudos == { - "Li": [p for p in pseudos if p.element == "Li"], - "H": [p for p in pseudos if p.element == "H"], + "Li": {p for p in pseudos if p.element == "Li"}, + "H": {p for p in pseudos if p.element == "H"}, } @@ -66,8 +67,9 @@ def test_pseudopotential_group_get_structure(): structure.append_atom(position=(0.500, 0.500, 0.500), symbols="H") retrieved_pseudos = pseudogroup.get_pseudos(structure=structure) + retrieved_pseudos = {key: {x for x in val} for key, val in retrieved_pseudos.items()} assert retrieved_pseudos == { - "Li": [p for p in pseudos if p.element == "Li"], - "H": [p for p in pseudos if p.element == "H"], + "Li": {p for p in pseudos if p.element == "Li"}, + "H": {p for p in pseudos if p.element == "H"}, } diff --git a/tests/test_pseudo_data.py b/tests/test_pseudo_data.py index 43d0b72..d358911 100644 --- a/tests/test_pseudo_data.py +++ b/tests/test_pseudo_data.py @@ -79,7 +79,7 @@ def test_validation_empty(): def test_validation_invalid_local(): - Pseudo = DataFactory("gaussian.pseudo") + Pseudo = DataFactory("gaussian.pseudo.gthpseudopotential") pseudo = Pseudo(name="test", element="H", local={"r": 1.23, "coeffs": [], "something": "else"}) with pytest.raises(ValidationError): From dbb3f595e3feecb4b148bb20e559f6f9efec76eb Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 22 Sep 2021 16:14:26 +0200 Subject: [PATCH 09/47] Fix small error in Pseudopotenial.get, giving support for other types of PPs. --- aiida_gaussian_datatypes/pseudopotential/data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 9ebc7d4..05fe900 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -171,7 +171,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab query.append(Group, filters={"label": group_label}, tag="group") params["with_group"] = "group" - query.append(Pseudopotential, **params) + query.append(cls, **params) filters = {"attributes.element": {"==": element}} @@ -184,7 +184,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab else: filters["attributes.name"] = {"==": name} - query.add_filter(Pseudopotential, filters) + query.add_filter(cls, filters) # SQLA ORM only solution: # query.order_by({Pseudopotential: [{"attributes.version": {"cast": "i", "order": "desc"}}]}) @@ -355,7 +355,6 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ "lmax" : lmax, "n_el" : None} - if duplicate_handling == "ignore": # simply filter duplicates pass From 0fa709011018a7a72ca8734e963ae894e7581b04 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 22 Sep 2021 16:29:21 +0200 Subject: [PATCH 10/47] Add duplicate handling for ECPs --- .../pseudopotential/data.py | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 05fe900..c48bf6c 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -313,6 +313,14 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ :rtype: list """ + def exists(pseudo): + try: + cls.get(pseudo["element"], pseudo["name"], match_aliases=False) + except NotExistent: + return False + + return True + """ Parser for Gamess format """ @@ -353,16 +361,24 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_ "name" : name, "core_electrons" : core_electrons, "lmax" : lmax, + "version" : 1, "n_el" : None} if duplicate_handling == "ignore": # simply filter duplicates - pass + if exists(data): + return [] elif duplicate_handling == "error": - pass + if exists(data): + raise UniquenessError( + f"Gaussian Pseudopotential already exists for" + f" element={data['element']}, name={data['name']}: {latest.uuid}" + ) elif duplicate_handling == "new": - pass + if exists(data): + latest = cls.get(data["element"], data["name"], match_aliases=False) + data["version"] = latest.version + 1 else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") From 2a5d0031fcd162f936c2ed27d995e226be7380dc Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 22 Sep 2021 16:58:58 +0200 Subject: [PATCH 11/47] Add first from_gamess tester --- tests/GAMESS_ECP.B | 7 +++++++ tests/test_pseudo_data.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/GAMESS_ECP.B diff --git a/tests/GAMESS_ECP.B b/tests/GAMESS_ECP.B new file mode 100644 index 0000000..9ab1f03 --- /dev/null +++ b/tests/GAMESS_ECP.B @@ -0,0 +1,7 @@ +B-ccECP GEN 2 1 +3 + 1.00000 1 30.0000 + 100.000 3 22.0000 +-1.00000 2 5.0000 +1 + 20.0000 2 4.0000 diff --git a/tests/test_pseudo_data.py b/tests/test_pseudo_data.py index d358911..ddad6e4 100644 --- a/tests/test_pseudo_data.py +++ b/tests/test_pseudo_data.py @@ -94,3 +94,18 @@ def test_get_matching_empty(): with pytest.raises(NotExistent): pseudos[0].get_matching_basisset() + + +def test_import_from_gamess(): + Pseudopotential = DataFactory("gaussian.pseudo") + + with open(TEST_DIR.joinpath("GAMESS_ECP.B"), "r") as fhandle: + # get only the He PADE pseudo + pseudos = Pseudopotential.from_gamess( fhandle ) + + assert len(pseudos) == 1 + + pseudos[0].store() + + # check that the name is used for the node label + assert pseudos[0].label == pseudos[0].name From e156d63ac3a9dc03c58b7554678bbb300f6ef80b Mon Sep 17 00:00:00 2001 From: addman Date: Thu, 23 Sep 2021 17:53:56 +0200 Subject: [PATCH 12/47] WIP --- aiida_gaussian_datatypes/libraries.py | 60 ++++++++++++++++++- .../pseudopotential/cli.py | 11 ---- setup.json | 3 +- 3 files changed, 60 insertions(+), 14 deletions(-) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 75c8e16..482097a 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -1,12 +1,68 @@ # -*- coding: utf-8 -*- # SPDX-License-Identifier: MIT +# +# Was there really a fish +# That grants you that kind of wish +# +import os +import re +import git +import tempfile +import pathlib from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar +from icecream import ic + +class LibraryBookKeeper: + + classes = [] + + @classmethod + def register_library(cls, cls_): + cls.classes.append(cls_) + + @classmethod + def get_libraries(cls): + return cls.classes + + @classmethod + def get_library_names(cls): + return [ re.match("", str(x)).group(1) for x in cls.classes ] + + @classmethod + def get_library_by_name(cls, name): + for cls_ in cls.get_libraries(): + if re.match(f"", str(cls_)) is not None: + return cls_ + return None class _ExternalLibrary: - def fetch(self): + @classmethod + def fetch(cls): pass -class MitasLibrary(_ExternalLibrary): +@LibraryBookKeeper.register_library +class EmptyLibrary(_ExternalLibrary): pass + +@LibraryBookKeeper.register_library +class MitasLibrary(_ExternalLibrary): + + _URL = "https://github.com/QMCPACK/pseudopotentiallibrary.git" + + @classmethod + def fetch(cls): + tempdir = pathlib.Path(tempfile.mkdtemp()) + git.Repo.clone_from(cls._URL, tempdir) + elements = { str(sub.name): {"file" : sub} for sub in (tempdir/"recipes").iterdir() if sub.is_dir() } + # Add types + elements = {el: {**data, + "types" : {x.name: {"path": x, + "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)], + "pseudo": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.ccECP\.gamess", b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()} + return elements + + + + diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index 94be183..f538c3a 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -228,14 +228,3 @@ def dump_pseudo(sym, name, tags, output_format, data): echo.echo_info("Dumping {}/{} ({})...".format(pseudo.name, pseudo.element, pseudo.uuid), err=True) writers[output_format](pseudo, sys.stdout) - -# fmt: off -@cli.command('install') -@click.argument('pseudopotential_library', type=click.Choice(,) -@decorators.with_dbenv() -# fmt: on -def install_family(pseudopotential_library): - """ - Installs a family of pseudo potentials from a remote repository - """ - pass diff --git a/setup.json b/setup.json index 6d59c57..6a20664 100644 --- a/setup.json +++ b/setup.json @@ -28,7 +28,8 @@ ], "aiida.cmdline.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli", - "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.cli:cli" + "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.cli:cli", + "gaussian = aiida_gaussian_datatypes.fetcher.cli:cli" ], "aiida.groups": [ "gaussian.basisset = aiida_gaussian_datatypes.groups:BasisSetGroup", From cdcbfddfe906f27bd780d407ea3c860e0b9a8a37 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 24 Sep 2021 12:32:44 +0200 Subject: [PATCH 13/47] Get rid of the ridiculous lineliner in libraries.py --- aiida_gaussian_datatypes/libraries.py | 51 +++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 6 deletions(-) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 482097a..ec50ae6 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -10,6 +10,7 @@ import git import tempfile import pathlib +from aiida_gaussian_datatypes import utils from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar from icecream import ic @@ -53,15 +54,53 @@ class MitasLibrary(_ExternalLibrary): @classmethod def fetch(cls): + + elements = {} + def add_row(p, elements = elements): + element = str(p.parent.parent.name) + if element not in utils.SYM2NUM: # Check if element is valid + return + element_path = p.parent.parent + + typ = str(p.parent.name) + typ_path = str(p.parent.name) + + if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", p.name): + nature = "basis" + elif re.match("[A-z]{1,2}\.ccECP\.gamess", p.name): + nature = "pseudos" + else: + """ + If does not match these regexes do nothing + """ + return + + if element not in elements: + elements[element] = {"path": element_path, + "types": {}} + + if typ not in elements[element]["types"]: + elements[element]["types"][typ] = {"path": typ_path, + "basis": [], + "pseudos": []} + + elements[element]["types"][typ][nature].append(p) + + tempdir = pathlib.Path(tempfile.mkdtemp()) git.Repo.clone_from(cls._URL, tempdir) - elements = { str(sub.name): {"file" : sub} for sub in (tempdir/"recipes").iterdir() if sub.is_dir() } - # Add types - elements = {el: {**data, - "types" : {x.name: {"path": x, - "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)], - "pseudo": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.ccECP\.gamess", b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()} + + for p in (tempdir/"recipes").glob("**/*"): + if str(p.name).lower().endswith(".gamess"): + add_row(p) + return elements +# elements = {el: {**data, +#- "types" : {x.name: {"path": x, +#- "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)], +#- "pseudo": [ b for b in x.iterdir() if re.match(, b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()} +#- return elements + From 63425d08db90bc027a19952cbf138f553073e120 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 24 Sep 2021 13:42:11 +0200 Subject: [PATCH 14/47] Add different formater for import tables --- aiida_gaussian_datatypes/fetcher/cli.py | 89 +++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 aiida_gaussian_datatypes/fetcher/cli.py diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py new file mode 100644 index 0000000..07c62bc --- /dev/null +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + +import click +import tabulate +from aiida.cmdline.utils import decorators, echo +from aiida.cmdline.commands.cmd_data import verdi_data +from ..libraries import * + +def _formatted_table_import(elements): + """generates a formatted table (using tabulate) for importable basis and PPs""" + + def _boldformater(f): + + def fout(*args, **kwargs): + if args[1] % 2 == 1: + return ( f"\033[1m{x}\033[0m" for x in f(*args, **kwargs)) + else: + return ( x for x in f(*args, **kwargs)) + return fout + + class row(): + + num = [] + element = [] + t = [] + + @_boldformater + def __new__(cls, num, element, t, p, b): + + if element in cls.element: + element = "" + else: + cls.element.append(element) + element = str(element) + cls.t = [] + + if num in cls.num: + num = "" + else: + cls.num.append(num) + num = str(num) + + if t in cls.t: + t = "" + else: + cls.t.append(t) + + if t == "": + p = "" + + return ( + num, + element, + t, + p, + b + ) + + table_content = [] + for ii, (e, d) in enumerate(elements.items()): + for t in d["types"]: + if len(d["types"][t]["pseudos"]) == 0: + continue + p = d["types"][t]["pseudos"][0] + for b in d["types"][t]["basis"]: + table_content.append(row(ii, e, t, p.name, b.name)) + + #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())] + return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "Pseudo", "Basis"]) + +@verdi_data.group("gaussian") +def cli(): + """Manage Pseudopotentials for GTO-based codes""" + +# fmt: off +@cli.command('fetch') +@click.argument('library', + type=click.Choice(LibraryBookKeeper.get_library_names())) +@decorators.with_dbenv() +# fmt: on +def install_family(library): + """ + Installs a family of pseudo potentials from a remote repository + """ + elements = LibraryBookKeeper.get_library_by_name(library).fetch() + echo.echo_info(f"Found {len(elements)} elements") + echo.echo(_formatted_table_import(elements)) + + From 76f9358259ba2f8ef97eccb64615f02fea75b05e Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 24 Sep 2021 14:23:04 +0200 Subject: [PATCH 15/47] WIP --- aiida_gaussian_datatypes/basisset/cli.py | 4 +- aiida_gaussian_datatypes/basisset/data.py | 53 ++++++++++++++++++++++- 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py index 3615ef7..45fa3d2 100644 --- a/aiida_gaussian_datatypes/basisset/cli.py +++ b/aiida_gaussian_datatypes/basisset/cli.py @@ -70,7 +70,8 @@ def cli(): multiple=True, help="filter by a tag (all tags must be present if specified multiple times)") @click.option( - 'fformat', '-f', '--format', type=click.Choice(['cp2k']), default='cp2k', + 'fformat', '-f', '--format', type=click.Choice(['cp2k', + 'gamess']), default='cp2k', help="the format of the basis set file") @click.option( '--duplicates', @@ -89,6 +90,7 @@ def import_basisset(basisset_file, fformat, sym, tags, duplicates, group): loaders = { "cp2k": BasisSet.from_cp2k, + "gamess": BasisSet.from_gamess, } filters = { diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 72c9622..7ea263b 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -236,7 +236,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab return items[0][0] @classmethod - def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore"): + def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore", element = None): """ Constructs a list with basis set objects from a Basis Set in CP2K format @@ -324,6 +324,57 @@ def decimal2str(val): return [cls(**bs) for bs in bsets] + @classmethod + def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", element = None): + """ + Constructs a list with basis set objects from a Basis Set in GAMESS format + + :param fhandle: open file handle + :param filters: a dict with attribute filter functions + :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version)) + :rtype: list + """ + + if not element: + raise ValueError(f"Element has to be set!") + + + """ + GAMESS parser + """ + + if duplicate_handling == "ignore": # simply filter duplicates + #bsets = [bs for bs in bsets if not exists(bs)] + pass + + elif duplicate_handling == "error": + #for bset in bsets: + # try: + # latest = cls.get(bset["element"], bset["name"], match_aliases=False) + # except NotExistent: + # pass + # else: + # raise UniquenessError( + # f"Gaussian Basis Set already exists for" + # f" element={bset['element']}, name={bset['name']}: {latest.uuid}" + # ) + pass + + elif duplicate_handling == "new": + #for bset in bsets: + # try: + # latest = cls.get(bset["element"], bset["name"], match_aliases=False) + # except NotExistent: + # pass + # else: + # bset["version"] = latest.version + 1 + pass + + else: + raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") + + return [] + def to_cp2k(self, fhandle): """ Write the Basis Set to the passed file handle in the format expected by CP2K. From f3ee4c0bec157397aaea98d56b857517d81eba78 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 24 Sep 2021 15:18:10 +0200 Subject: [PATCH 16/47] Change importter to import basis from nwchem. Problem with gamess format is it does not stores the element symbol. --- aiida_gaussian_datatypes/fetcher/cli.py | 3 ++- aiida_gaussian_datatypes/libraries.py | 13 ++----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index 07c62bc..dfc717b 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -53,6 +53,7 @@ def __new__(cls, num, element, t, p, b): element, t, p, + re.match("[A-z]{1,2}\.(.+).nwchem", b).group(1), b ) @@ -66,7 +67,7 @@ def __new__(cls, num, element, t, p, b): table_content.append(row(ii, e, t, p.name, b.name)) #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())] - return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "Pseudo", "Basis"]) + return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Basis", "BasisFile"]) @verdi_data.group("gaussian") def cli(): diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index ec50ae6..039ee43 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -65,7 +65,7 @@ def add_row(p, elements = elements): typ = str(p.parent.name) typ_path = str(p.parent.name) - if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", p.name): + if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.nwchem", p.name): nature = "basis" elif re.match("[A-z]{1,2}\.ccECP\.gamess", p.name): nature = "pseudos" @@ -91,17 +91,8 @@ def add_row(p, elements = elements): git.Repo.clone_from(cls._URL, tempdir) for p in (tempdir/"recipes").glob("**/*"): - if str(p.name).lower().endswith(".gamess"): + if str(p.name).lower().endswith(".gamess") or str(p.name).lower().endswith(".nwchem"): add_row(p) return elements -# elements = {el: {**data, -#- "types" : {x.name: {"path": x, -#- "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)], -#- "pseudo": [ b for b in x.iterdir() if re.match(, b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()} -#- return elements - - - - From 04cffdfc51f1c331c85912db9cc4044aea758267 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 24 Sep 2021 18:02:19 +0200 Subject: [PATCH 17/47] WIP --- aiida_gaussian_datatypes/basisset/cli.py | 4 ++-- aiida_gaussian_datatypes/basisset/data.py | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py index 45fa3d2..7d1c91c 100644 --- a/aiida_gaussian_datatypes/basisset/cli.py +++ b/aiida_gaussian_datatypes/basisset/cli.py @@ -71,7 +71,7 @@ def cli(): help="filter by a tag (all tags must be present if specified multiple times)") @click.option( 'fformat', '-f', '--format', type=click.Choice(['cp2k', - 'gamess']), default='cp2k', + 'nwchem']), default='cp2k', help="the format of the basis set file") @click.option( '--duplicates', @@ -90,7 +90,7 @@ def import_basisset(basisset_file, fformat, sym, tags, duplicates, group): loaders = { "cp2k": BasisSet.from_cp2k, - "gamess": BasisSet.from_gamess, + "nwchem": BasisSet.from_nwchem, } filters = { diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 7ea263b..9b907b6 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -325,9 +325,9 @@ def decimal2str(val): return [cls(**bs) for bs in bsets] @classmethod - def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", element = None): + def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element = None): """ - Constructs a list with basis set objects from a Basis Set in GAMESS format + Constructs a list with basis set objects from a Basis Set in NWCHEM format :param fhandle: open file handle :param filters: a dict with attribute filter functions @@ -340,8 +340,10 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", element """ - GAMESS parser + NWCHEM parser """ + for line in fhande: + if duplicate_handling == "ignore": # simply filter duplicates #bsets = [bs for bs in bsets if not exists(bs)] From 506664f47f13b8cdcfff7e3a096f05455e919af5 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 24 Sep 2021 18:03:23 +0200 Subject: [PATCH 18/47] Forgot to add __init__.py --- aiida_gaussian_datatypes/fetcher/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 aiida_gaussian_datatypes/fetcher/__init__.py diff --git a/aiida_gaussian_datatypes/fetcher/__init__.py b/aiida_gaussian_datatypes/fetcher/__init__.py new file mode 100644 index 0000000..e69de29 From 188fe1de416ecee71e9a00f2d786ec6cd2b9820b Mon Sep 17 00:00:00 2001 From: addman Date: Sun, 26 Sep 2021 00:15:49 +0200 Subject: [PATCH 19/47] Add nwchem loader for basis --- aiida_gaussian_datatypes/basisset/data.py | 70 +++++++++++++++++++---- 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 9b907b6..9f87a6e 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -14,8 +14,11 @@ NotExistent, UniquenessError, ValidationError, + ParsingError ) +import re from aiida.orm import Data, Group +from icecream import ic class BasisSet(Data): @@ -82,14 +85,15 @@ def _validate(self): # directly raises an exception for the data if something's amiss, extra fields are ignored BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes}) - assert isinstance(self.name, str) and self.name + #assert isinstance(self.name, str) and self.name + ic(self.aliases) assert ( isinstance(self.aliases, list) and all(isinstance(alias, str) for alias in self.aliases) and self.aliases ) - assert isinstance(self.tags, list) and all(isinstance(tag, str) for tag in self.tags) - assert isinstance(self.version, int) and self.version > 0 + #assert isinstance(self.tags, list) and all(isinstance(tag, str) for tag in self.tags) + #assert isinstance(self.version, int) and self.version > 0 except Exception as exc: raise ValidationError("One or more invalid fields found") from exc @@ -236,7 +240,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab return items[0][0] @classmethod - def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore", element = None): + def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore"): """ Constructs a list with basis set objects from a Basis Set in CP2K format @@ -325,7 +329,7 @@ def decimal2str(val): return [cls(**bs) for bs in bsets] @classmethod - def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element = None): + def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore"): """ Constructs a list with basis set objects from a Basis Set in NWCHEM format @@ -335,15 +339,51 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element :rtype: list """ - if not element: - raise ValueError(f"Element has to be set!") - """ NWCHEM parser """ - for line in fhande: + element = None + data = [] + blocks = [] + + def block_creator(b, orb, blocks = blocks): + orb_dict = {"s" : 0, + "p" : 1, + "d" : 2, + "f" : 3, + "g" : 4, + "h" : 5, + "i" : 6 } + block = { "n": 0, # I dont know how to setup main quantum number + "l": [(orb_dict[orb], len(data))], + "coefficients" : [ [ d["exp"], d["cont"] ] for d in b ] } + blocks.append(block) + + for line in fhandle: + """ + Element symbol has to be every block + """ + if re.match("^[A-z ]+$", line): + if len(data) != 0: + block_creator(data, orb) + data = [] + el, orb, = line.lower().split() + if element is None: + """ + TODO check validity of element + """ + element = el + elif element != el: + raise ParsingError(f"Element previous {element}, and now {el}.") # Element cannot be changed + if re.match("^[+-.0-9 ]+$", line): + exp, cont, = [ float(x) for x in line.split() ] + data.append({"exp" : exp, + "cont" : cont }) + if len(data) != 0: + block_creator(data, orb) + data = [] if duplicate_handling == "ignore": # simply filter duplicates #bsets = [bs for bs in bsets if not exists(bs)] @@ -375,7 +415,17 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - return [] + basis = {"element" : element.capitalize(), + "version" : 1, + "tags" : [], + "aliases" : ["nwchem"], + "blocks" : blocks } + + if hasattr(fhandle, "name"): + basis["name"] = fhandle.name + basis["aliases"].append(fhandle.name.replace(".nwchem", "")) + + return [cls(**basis)] def to_cp2k(self, fhandle): """ From 24b4c55cc13f02752303e34c8795a16a58e617c5 Mon Sep 17 00:00:00 2001 From: addman Date: Sun, 26 Sep 2021 00:46:54 +0200 Subject: [PATCH 20/47] WIP --- aiida_gaussian_datatypes/basisset/data.py | 1 - aiida_gaussian_datatypes/fetcher/cli.py | 30 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 9f87a6e..2779c00 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -86,7 +86,6 @@ def _validate(self): BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes}) #assert isinstance(self.name, str) and self.name - ic(self.aliases) assert ( isinstance(self.aliases, list) and all(isinstance(alias, str) for alias in self.aliases) diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index dfc717b..521848e 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -5,6 +5,10 @@ from aiida.cmdline.utils import decorators, echo from aiida.cmdline.commands.cmd_data import verdi_data from ..libraries import * +from ..basisset.data import BasisSet +from ..pseudopotential.data import Pseudopotential + +from ..utils import click_parse_range # pylint: disable=relative-beyond-top-level def _formatted_table_import(elements): """generates a formatted table (using tabulate) for importable basis and PPs""" @@ -86,5 +90,31 @@ def install_family(library): elements = LibraryBookKeeper.get_library_by_name(library).fetch() echo.echo_info(f"Found {len(elements)} elements") echo.echo(_formatted_table_import(elements)) + echo.echo("") + indexes = click.prompt( + "Which Elements do you want to add?" + " ('n' for none, 'a' for all, comma-seperated list or range of numbers)", + value_proc=lambda v: click_parse_range(v, len(elements))) + ic(elements) + ic(indexes) + for idx in indexes: + e = elements[{1: "H"}[idx+1]] + for t, o in e["types"].items(): + for b in o["basis"]: + with open(str(b), "r") as fhandle: + basis, = BasisSet.from_nwchem(fhandle) + echo.echo(f"Adding Basis for: {basis.element} ({basis.name})... ", nl=False) + echo.echo("DONE") + for p in o["pseudos"]: + with open(str(p), "r") as fhandle: + pseudo, = Pseudopotential.from_gamess(fhandle) + echo.echo(f"Adding Pseudo for: {pseudo.element} ({pseudo.name})... ", nl=False) + echo.echo("DONE") + + + # echo.echo_info( + # "Adding Objects for: {p.element} ({p.name})... ".format(p=pseudos[idx]), nl=False) + # pseudos[idx].store() + # echo.echo("DONE") From f3e89c523ce37cb90a72dba8274bbba549eb92aa Mon Sep 17 00:00:00 2001 From: addman Date: Mon, 27 Sep 2021 16:36:27 +0200 Subject: [PATCH 21/47] WIP --- aiida_gaussian_datatypes/basisset/data.py | 14 +-- aiida_gaussian_datatypes/fetcher/cli.py | 101 +++++++++++++++++----- aiida_gaussian_datatypes/libraries.py | 3 +- 3 files changed, 89 insertions(+), 29 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 2779c00..aa59b8c 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -18,6 +18,7 @@ ) import re from aiida.orm import Data, Group +from pathlib import Path from icecream import ic @@ -328,7 +329,7 @@ def decimal2str(val): return [cls(**bs) for bs in bsets] @classmethod - def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore"): + def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", name = None): """ Constructs a list with basis set objects from a Basis Set in NWCHEM format @@ -416,13 +417,16 @@ def block_creator(b, orb, blocks = blocks): basis = {"element" : element.capitalize(), "version" : 1, + "name" : "unknown", "tags" : [], - "aliases" : ["nwchem"], + "aliases" : [""], "blocks" : blocks } - if hasattr(fhandle, "name"): - basis["name"] = fhandle.name - basis["aliases"].append(fhandle.name.replace(".nwchem", "")) + if name is not None: + basis["name"] = name + elif hasattr(fhandle, "name"): + basis["name"] = Path(fhandle.name).name.replace(".nwchem", "") + basis["aliases"].append(basis["name"].split(".")[-1]) return [cls(**basis)] diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index 521848e..5a59087 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -2,13 +2,28 @@ import click import tabulate +from pathlib import Path from aiida.cmdline.utils import decorators, echo from aiida.cmdline.commands.cmd_data import verdi_data +from aiida.orm import load_group from ..libraries import * from ..basisset.data import BasisSet from ..pseudopotential.data import Pseudopotential - -from ..utils import click_parse_range # pylint: disable=relative-beyond-top-level +#from ..groups import ( +# BasisSetGroup, +# PseudopotenialGroup, +#) +from ..groups import BasisSetGroup +from ..groups import PseudopotentialGroup + +#from ..utils import ( +# click_parse_range, # pylint: disable=relative-beyond-top-level +# SYM2NUM, +#) + +from ..utils import click_parse_range +from ..utils import SYM2NUM +from aiida.common.exceptions import UniquenessError def _formatted_table_import(elements): """generates a formatted table (using tabulate) for importable basis and PPs""" @@ -29,7 +44,7 @@ class row(): t = [] @_boldformater - def __new__(cls, num, element, t, p, b): + def __new__(cls, num, element, t, p, tags, b): if element in cls.element: element = "" @@ -51,27 +66,29 @@ def __new__(cls, num, element, t, p, b): if t == "": p = "" + tags = [] return ( num, element, t, p, + " ".join(tags), re.match("[A-z]{1,2}\.(.+).nwchem", b).group(1), b ) table_content = [] - for ii, (e, d) in enumerate(elements.items()): + for ii, (e, d) in enumerate(elements): for t in d["types"]: if len(d["types"][t]["pseudos"]) == 0: continue p = d["types"][t]["pseudos"][0] for b in d["types"][t]["basis"]: - table_content.append(row(ii, e, t, p.name, b.name)) + table_content.append(row(ii, e, t, p.name, d["types"][t]["tags"], b.name)) #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())] - return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Basis", "BasisFile"]) + return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"]) @verdi_data.group("gaussian") def cli(): @@ -87,7 +104,27 @@ def install_family(library): """ Installs a family of pseudo potentials from a remote repository """ + + basissetgname = f"{library}-basis" + try: + basisgroup = load_group(basissetgname) + except: + echo.echo_info("Creating library basis group ... ", nl = False) + basisgroup = BasisSetGroup(basissetgname) + basisgroup.store() + echo.echo("DONE") + + pseudogname = f"{library}-pseudo" + try: + pseudogroup = load_group(pseudogname) + except: + echo.echo_info("Creating library pseudo group ... ", nl = False) + pseudogroup = PseudopotentialGroup(pseudogname) + pseudogroup.store() + echo.echo("DONE") + elements = LibraryBookKeeper.get_library_by_name(library).fetch() + elements = [ [el, p] for el, p in sorted(elements.items(), key = lambda x: SYM2NUM[x[0]]) ] echo.echo_info(f"Found {len(elements)} elements") echo.echo(_formatted_table_import(elements)) echo.echo("") @@ -95,26 +132,44 @@ def install_family(library): "Which Elements do you want to add?" " ('n' for none, 'a' for all, comma-seperated list or range of numbers)", value_proc=lambda v: click_parse_range(v, len(elements))) - ic(elements) - ic(indexes) for idx in indexes: - e = elements[{1: "H"}[idx+1]] - for t, o in e["types"].items(): + e, v = elements[idx] + for t, o in v["types"].items(): for b in o["basis"]: with open(str(b), "r") as fhandle: - basis, = BasisSet.from_nwchem(fhandle) - echo.echo(f"Adding Basis for: {basis.element} ({basis.name})... ", nl=False) - echo.echo("DONE") + try: + basis, = BasisSet.from_nwchem(fhandle, + duplicate_handling = "new", + name = f"{t}.{Path(fhandle.name).name}" + ) + if basis is None: + continue + echo.echo_info(f"Adding Basis for: ", nl=False) + echo.echo(f"{basis.element} ({basis.name})... ", nl=False) + basis.tags.extend(o["tags"]) + basis.store() + basisgroup.add_nodes([basis]) + echo.echo("Imported") + except UniquenessError: + echo.echo("Skipping (already in)") + except Exception as e: + echo.echo("Skipping (something went wrong)") for p in o["pseudos"]: with open(str(p), "r") as fhandle: - pseudo, = Pseudopotential.from_gamess(fhandle) - echo.echo(f"Adding Pseudo for: {pseudo.element} ({pseudo.name})... ", nl=False) - echo.echo("DONE") - - - # echo.echo_info( - # "Adding Objects for: {p.element} ({p.name})... ".format(p=pseudos[idx]), nl=False) - # pseudos[idx].store() - # echo.echo("DONE") - + try: + pseudo, = Pseudopotential.from_gamess(fhandle, + duplicate_handling = "new" + ) + if pseudo is None: + continue + echo.echo_info(f"Adding Basis for: ", nl=False) + echo.echo(f"{pseudo.element} ({pseudo.name})... ", nl=False) + pseudo.tags.extend(o["tags"]) + pseudo.store() + pseudogroup.add_nodes([pseudo]) + echo.echo("Imported") + except UniquenessError: + echo.echo("Skipping (already in)") + except Exception as e: + echo.echo("Skipping (something went wrong)") diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 039ee43..d7aeaed 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -82,7 +82,8 @@ def add_row(p, elements = elements): if typ not in elements[element]["types"]: elements[element]["types"][typ] = {"path": typ_path, "basis": [], - "pseudos": []} + "pseudos": [], + "tags": ["ECP", typ, ]} elements[element]["types"][typ][nature].append(p) From 843dbb614130d3600fdd27f0a020631f91d21732 Mon Sep 17 00:00:00 2001 From: addman Date: Tue, 28 Sep 2021 10:57:02 +0200 Subject: [PATCH 22/47] WIP --- aiida_gaussian_datatypes/fetcher/cli.py | 11 +++-- aiida_gaussian_datatypes/libraries.py | 46 +++++++++++++++++-- .../pseudopotential/cli.py | 6 ++- .../pseudopotential/data.py | 23 +++++++++- 4 files changed, 74 insertions(+), 12 deletions(-) diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index 5a59087..4d684df 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -85,7 +85,7 @@ def __new__(cls, num, element, t, p, tags, b): continue p = d["types"][t]["pseudos"][0] for b in d["types"][t]["basis"]: - table_content.append(row(ii, e, t, p.name, d["types"][t]["tags"], b.name)) + table_content.append(row(ii, e, t, p["path"].name, d["types"][t]["tags"], b["path"].name)) #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())] return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"]) @@ -124,6 +124,7 @@ def install_family(library): echo.echo("DONE") elements = LibraryBookKeeper.get_library_by_name(library).fetch() + elements = [ [el, p] for el, p in sorted(elements.items(), key = lambda x: SYM2NUM[x[0]]) ] echo.echo_info(f"Found {len(elements)} elements") echo.echo(_formatted_table_import(elements)) @@ -147,8 +148,8 @@ def install_family(library): echo.echo_info(f"Adding Basis for: ", nl=False) echo.echo(f"{basis.element} ({basis.name})... ", nl=False) basis.tags.extend(o["tags"]) - basis.store() - basisgroup.add_nodes([basis]) + #basis.store() + #basisgroup.add_nodes([basis]) echo.echo("Imported") except UniquenessError: echo.echo("Skipping (already in)") @@ -165,8 +166,8 @@ def install_family(library): echo.echo_info(f"Adding Basis for: ", nl=False) echo.echo(f"{pseudo.element} ({pseudo.name})... ", nl=False) pseudo.tags.extend(o["tags"]) - pseudo.store() - pseudogroup.add_nodes([pseudo]) + #pseudo.store() + #pseudogroup.add_nodes([pseudo]) echo.echo("Imported") except UniquenessError: echo.echo("Skipping (already in)") diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index d7aeaed..912d10b 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -4,7 +4,6 @@ # Was there really a fish # That grants you that kind of wish # - import os import re import git @@ -13,6 +12,8 @@ from aiida_gaussian_datatypes import utils from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar from icecream import ic +from .basisset.data import BasisSet +from .pseudopotential.data import Pseudopotential class LibraryBookKeeper: @@ -84,8 +85,35 @@ def add_row(p, elements = elements): "basis": [], "pseudos": [], "tags": ["ECP", typ, ]} - - elements[element]["types"][typ][nature].append(p) + val = {} + val["path"] = p + with open(p, "r") as fhandle: + if nature == "basis": + try: + obj, = BasisSet.from_nwchem(fhandle, + duplicate_handling = "new") + except: + """ + Something went wrong in the import, continuing ... + """ + return + tags = ["aug"] + elif nature == "pseudos": + try: + obj, = Pseudopotential.from_gamess(fhandle, + duplicate_handling = "new") + except: + """ + Something went wrong in the import, continuing ... + """ + return + tags = [] + else: + raise # TODO give here an error + obj.tags.extend(tags) + val["obj"] = obj + val["tags"] = tags + elements[element]["types"][typ][nature].append(val) tempdir = pathlib.Path(tempfile.mkdtemp()) @@ -95,5 +123,17 @@ def add_row(p, elements = elements): if str(p.name).lower().endswith(".gamess") or str(p.name).lower().endswith(".nwchem"): add_row(p) + """ Update valence electrons """ + for e in elements: + for t in elements[e]["types"]: + if len(elements[e]["types"][t]["pseudos"]) == 1: + tags = [f'q{elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot}', + f'c{elements[e]["types"][t]["pseudos"][0]["obj"].core_electrons}' + ] + elements[e]["types"][t]["tags"].extend(tags) + for ii, b in enumerate(elements[e]["types"][t]["basis"]): + elements[e]["types"][t]["basis"][ii]["obj"].n_el = elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot + + return elements diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index f538c3a..32eb64c 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -33,11 +33,12 @@ def row(num, pseudo): _names_column(pseudo.name, pseudo.aliases), ", ".join(pseudo.tags), ", ".join(f"{n:2d}" for n in pseudo.n_el + (3 - len(pseudo.n_el)) * [0]), + pseudo.n_el_tot, pseudo.version, ) table_content = [row(n, p) for n, p in enumerate(pseudos)] - return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"]) + return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Tot. val. e⁻", "Version"]) def _formatted_table_list(pseudos): @@ -51,11 +52,12 @@ def row(pseudo): _names_column(pseudo.name, pseudo.aliases), ", ".join(pseudo.tags), ", ".join(f"{n:2d}" for n in pseudo.n_el + (3 - len(pseudo.n_el)) * [0]), + pseudo.n_el_tot, pseudo.version, ) table_content = [row(p) for p in pseudos] - return tabulate.tabulate(table_content, headers=["ID", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"]) + return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Tot. val. e⁻", "Version"]) @verdi_data.group("gaussian.pseudo") diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index c48bf6c..6b1c3ba 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -7,6 +7,7 @@ """ import dataclasses +from ..utils import SYM2NUM from decimal import Decimal from icecream import ic @@ -34,6 +35,7 @@ def __init__( aliases=None, tags=None, n_el=None, + n_el_tot=None, version=1, **kwargs, ): @@ -53,13 +55,18 @@ def __init__( if not n_el: n_el = [] + else: + if not n_el_tot: + n_el_tot = sum(n_el) + else: + raise #TODO a propiate error here if "label" not in kwargs: kwargs["label"] = name super().__init__(**kwargs) - for attr in ("name", "element", "tags", "aliases", "n_el", "version"): + for attr in ("name", "element", "tags", "aliases", "n_el", "n_el_tot", "version"): self.set_attribute(attr, locals()[attr]) def store(self, *args, **kwargs): @@ -94,6 +101,8 @@ def _validate(self): ) assert isinstance(self.tags, list) and all(isinstance(tag, str) for tag in self.tags) assert isinstance(self.version, int) and self.version > 0 + if len(self.n_el) != 0: + assert(sum(self.n_el) == self.n_el_tot) except Exception as exc: raise ValidationError("One or more invalid fields found") from exc @@ -151,6 +160,15 @@ def n_el(self): return self.get_attribute("n_el", []) + @property + def n_el_tot(self): + """ + Return the number of electrons per angular momentum + :rtype:int + """ + + return self.get_attribute("n_el_tot", []) + @classmethod def get(cls, element, name=None, version="latest", match_aliases=True, group_label=None, n_el=None): """ @@ -362,7 +380,8 @@ def exists(pseudo): "core_electrons" : core_electrons, "lmax" : lmax, "version" : 1, - "n_el" : None} + "n_el" : None, + "n_el_tot" : SYM2NUM[element] - core_electrons} if duplicate_handling == "ignore": # simply filter duplicates if exists(data): From 61cf7eb91f5be86f6dc268aa8e87b64058151fad Mon Sep 17 00:00:00 2001 From: addman Date: Tue, 28 Sep 2021 12:27:47 +0200 Subject: [PATCH 23/47] WIP --- aiida_gaussian_datatypes/basisset/data.py | 37 ++++++--- aiida_gaussian_datatypes/fetcher/cli.py | 61 ++++++--------- aiida_gaussian_datatypes/libraries.py | 93 +++++++++-------------- 3 files changed, 87 insertions(+), 104 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index aa59b8c..adcc9f9 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -329,7 +329,7 @@ def decimal2str(val): return [cls(**bs) for bs in bsets] @classmethod - def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", name = None): + def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = None): """ Constructs a list with basis set objects from a Basis Set in NWCHEM format @@ -348,6 +348,9 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", name = data = [] blocks = [] + if not attrs: + attrs = {} + def block_creator(b, orb, blocks = blocks): orb_dict = {"s" : 0, "p" : 1, @@ -415,19 +418,31 @@ def block_creator(b, orb, blocks = blocks): else: raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - basis = {"element" : element.capitalize(), - "version" : 1, - "name" : "unknown", - "tags" : [], - "aliases" : [""], - "blocks" : blocks } - - if name is not None: - basis["name"] = name - elif hasattr(fhandle, "name"): + try: + basis = {"element" : element.capitalize(), + "version" : 1, + "name" : "unknown", + "tags" : [], + "aliases" : [], + "blocks" : blocks } + except: + return [] + + if hasattr(fhandle, "name"): basis["name"] = Path(fhandle.name).name.replace(".nwchem", "") basis["aliases"].append(basis["name"].split(".")[-1]) + if "name" in attrs: + basis["aliases"].append(basis["name"]) + basis["name"] = attrs["name"] + + for attr in ("n_el", "tags",): + if attr in attrs: + basis[attr] = attrs[attr] + + if len(basis["aliases"]) == 0: + del basis["aliases"] + return [cls(**basis)] def to_cp2k(self, fhandle): diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index 4d684df..c427833 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -85,7 +85,9 @@ def __new__(cls, num, element, t, p, tags, b): continue p = d["types"][t]["pseudos"][0] for b in d["types"][t]["basis"]: - table_content.append(row(ii, e, t, p["path"].name, d["types"][t]["tags"], b["path"].name)) + table_content.append(row(ii, e, t, p["path"].name, + d["types"][t]["tags"], + b["path"].name)) #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())] return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"]) @@ -137,40 +139,27 @@ def install_family(library): e, v = elements[idx] for t, o in v["types"].items(): for b in o["basis"]: - with open(str(b), "r") as fhandle: - try: - basis, = BasisSet.from_nwchem(fhandle, - duplicate_handling = "new", - name = f"{t}.{Path(fhandle.name).name}" - ) - if basis is None: - continue - echo.echo_info(f"Adding Basis for: ", nl=False) - echo.echo(f"{basis.element} ({basis.name})... ", nl=False) - basis.tags.extend(o["tags"]) - #basis.store() - #basisgroup.add_nodes([basis]) - echo.echo("Imported") - except UniquenessError: - echo.echo("Skipping (already in)") - except Exception as e: - echo.echo("Skipping (something went wrong)") + basis = b["obj"] + echo.echo_info(f"Adding Basis for: ", nl=False) + echo.echo(f"{basis.element} ({basis.name})... ", nl=False) + try: + basis.store() + basisgroup.add_nodes([basis]) + echo.echo("Imported") + except UniquenessError: + echo.echo("Skipping (already in)") + except Exception as e: + echo.echo("Skipping (something went wrong)") for p in o["pseudos"]: - with open(str(p), "r") as fhandle: - try: - pseudo, = Pseudopotential.from_gamess(fhandle, - duplicate_handling = "new" - ) - if pseudo is None: - continue - echo.echo_info(f"Adding Basis for: ", nl=False) - echo.echo(f"{pseudo.element} ({pseudo.name})... ", nl=False) - pseudo.tags.extend(o["tags"]) - #pseudo.store() - #pseudogroup.add_nodes([pseudo]) - echo.echo("Imported") - except UniquenessError: - echo.echo("Skipping (already in)") - except Exception as e: - echo.echo("Skipping (something went wrong)") + pseudo = p["obj"] + echo.echo_info(f"Adding Basis for: ", nl=False) + echo.echo(f"{pseudo.element} ({pseudo.name})... ", nl=False) + try: + pseudo.store() + pseudogroup.add_nodes([pseudo]) + echo.echo("Imported") + except UniquenessError: + echo.echo("Skipping (already in)") + except Exception as e: + echo.echo("Skipping (something went wrong)") diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 912d10b..a8c6ac1 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -57,7 +57,7 @@ class MitasLibrary(_ExternalLibrary): def fetch(cls): elements = {} - def add_row(p, elements = elements): + def add_data(p, elements = elements): element = str(p.parent.parent.name) if element not in utils.SYM2NUM: # Check if element is valid return @@ -66,15 +66,36 @@ def add_row(p, elements = elements): typ = str(p.parent.name) typ_path = str(p.parent.name) - if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.nwchem", p.name): - nature = "basis" - elif re.match("[A-z]{1,2}\.ccECP\.gamess", p.name): - nature = "pseudos" - else: - """ - If does not match these regexes do nothing - """ - return + tags = ["ECP", typ, ] + + """ Load Pseudopotential first """ + with open(p, "r") as fhandle: + pseudo, = Pseudopotential.from_gamess(fhandle, + duplicate_handling = "new") + tags.append(f"q{pseudo.n_el_tot}") + tags.append(f"c{pseudo.core_electrons}") + pseudo.tags.extend(tags) + + pseudos = [{"path": p, + "obj": pseudo}] + + + """ Load Basis sets """ + basis = [] + for r in (p.parent).glob("**/*"): + if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.nwchem", r.name): + name = re.match("[A-z]{1,2}\.([A-z\-]*cc-.*)\.nwchem", r.name).group(1) + name = f"{typ}-{name}" + with open(r, "r") as fhandle: + b = BasisSet.from_nwchem(fhandle, + duplicate_handling = "new", + attrs = {"n_el": pseudo.n_el_tot, + "name": name, + "tags": tags}) + if len(b) == 0: continue + b, = b + basis.append({"path": r, + "obj": b}) if element not in elements: elements[element] = {"path": element_path, @@ -82,58 +103,16 @@ def add_row(p, elements = elements): if typ not in elements[element]["types"]: elements[element]["types"][typ] = {"path": typ_path, - "basis": [], - "pseudos": [], - "tags": ["ECP", typ, ]} - val = {} - val["path"] = p - with open(p, "r") as fhandle: - if nature == "basis": - try: - obj, = BasisSet.from_nwchem(fhandle, - duplicate_handling = "new") - except: - """ - Something went wrong in the import, continuing ... - """ - return - tags = ["aug"] - elif nature == "pseudos": - try: - obj, = Pseudopotential.from_gamess(fhandle, - duplicate_handling = "new") - except: - """ - Something went wrong in the import, continuing ... - """ - return - tags = [] - else: - raise # TODO give here an error - obj.tags.extend(tags) - val["obj"] = obj - val["tags"] = tags - elements[element]["types"][typ][nature].append(val) - + "basis": basis, + "pseudos": pseudos, + "tags": tags} tempdir = pathlib.Path(tempfile.mkdtemp()) git.Repo.clone_from(cls._URL, tempdir) for p in (tempdir/"recipes").glob("**/*"): - if str(p.name).lower().endswith(".gamess") or str(p.name).lower().endswith(".nwchem"): - add_row(p) - - """ Update valence electrons """ - for e in elements: - for t in elements[e]["types"]: - if len(elements[e]["types"][t]["pseudos"]) == 1: - tags = [f'q{elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot}', - f'c{elements[e]["types"][t]["pseudos"][0]["obj"].core_electrons}' - ] - elements[e]["types"][t]["tags"].extend(tags) - for ii, b in enumerate(elements[e]["types"][t]["basis"]): - elements[e]["types"][t]["basis"][ii]["obj"].n_el = elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot - + if re.match("[A-z]{1,2}\.ccECP\.gamess", p.name): + add_data(p) return elements From b1c13f5d56b62e8f915b40096b7266e9b38a8038 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 29 Sep 2021 09:40:08 +0200 Subject: [PATCH 24/47] Add NWCHEM writer for basis --- aiida_gaussian_datatypes/basisset/cli.py | 3 ++- aiida_gaussian_datatypes/basisset/data.py | 31 ++++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py index 7d1c91c..22fc0e5 100644 --- a/aiida_gaussian_datatypes/basisset/cli.py +++ b/aiida_gaussian_datatypes/basisset/cli.py @@ -175,7 +175,7 @@ def list_basisset(sym, name, tags): help="filter by name") @click.option('tags', '--tag', '-t', multiple=True, help="filter by a tag (all tags must be present if specified multiple times)") -@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', ]), default='cp2k', +@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', 'nwchem']), default='cp2k', help="Chose the output format for the basiset: " + ', '.join(['cp2k', ])) # fmt: on @decorators.with_dbenv() @@ -190,6 +190,7 @@ def dump_basisset(sym, name, tags, output_format, data): writers = { "cp2k": BasisSet.to_cp2k, + "nwchem" : BasisSet.to_nwchem, } if data: diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index adcc9f9..e903785 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -342,6 +342,8 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = """ NWCHEM parser + + TODO Maybe parser should move to "parsers" """ element = None @@ -360,7 +362,7 @@ def block_creator(b, orb, blocks = blocks): "h" : 5, "i" : 6 } block = { "n": 0, # I dont know how to setup main quantum number - "l": [(orb_dict[orb], len(data))], + "l": [(orb_dict[orb], 1)], "coefficients" : [ [ d["exp"], d["cont"] ] for d in b ] } blocks.append(block) @@ -458,6 +460,33 @@ def to_cp2k(self, fhandle): fhandle.write(line) fhandle.write("\n") + def to_nwchem(self, fhandle): + """ + Write the Basis Set to the passed file handle in the format expected by NWCHEM. + + :param fhandle: A valid output file handle + """ + orb_dict = {0 : "s", + 1 : "p", + 2 : "d", + 3 : "f", + 4 : "g", + 5 : "h", + 6 : "i" } + + fhandle.write(f"# from AiiDA BasisSet\n") + for block in self.blocks: + offset = 0 + for orb, num, in block["l"]: + fhandle.write(f"{self.element} {orb_dict[orb]}\n") + for lnum in range(num): + for entry in block["coefficients"]: + exponent = entry[0] + coefficient = entry[1 + lnum + offset] + fhandle.write(f" {exponent:15.7f} {coefficient:15.7f}\n") + offset = num + + def get_matching_pseudopotential(self, *args, **kwargs): """ Get a pseudopotential matching this basis set by at least element and number of valence electrons. From c4d6c60984bcb6f1261d01f82c900c2808884629 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 29 Sep 2021 09:49:45 +0200 Subject: [PATCH 25/47] Add GAMESS format writer for basis --- aiida_gaussian_datatypes/basisset/cli.py | 3 ++- aiida_gaussian_datatypes/basisset/data.py | 26 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py index 22fc0e5..a299782 100644 --- a/aiida_gaussian_datatypes/basisset/cli.py +++ b/aiida_gaussian_datatypes/basisset/cli.py @@ -175,7 +175,7 @@ def list_basisset(sym, name, tags): help="filter by name") @click.option('tags', '--tag', '-t', multiple=True, help="filter by a tag (all tags must be present if specified multiple times)") -@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', 'nwchem']), default='cp2k', +@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', 'nwchem', 'gamess',]), default='cp2k', help="Chose the output format for the basiset: " + ', '.join(['cp2k', ])) # fmt: on @decorators.with_dbenv() @@ -191,6 +191,7 @@ def dump_basisset(sym, name, tags, output_format, data): writers = { "cp2k": BasisSet.to_cp2k, "nwchem" : BasisSet.to_nwchem, + "gamess" : BasisSet.to_gamess, } if data: diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index e903785..d175269 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -486,6 +486,32 @@ def to_nwchem(self, fhandle): fhandle.write(f" {exponent:15.7f} {coefficient:15.7f}\n") offset = num + def to_gamess(self, fhandle): + """ + Write the Basis Set to the passed file handle in the format expected by GAMESS. + + :param fhandle: A valid output file handle + """ + orb_dict = {0 : "s", + 1 : "p", + 2 : "d", + 3 : "f", + 4 : "g", + 5 : "h", + 6 : "i" } + + fhandle.write(f"# from AiiDA BasisSet\n") + for block in self.blocks: + offset = 0 + for orb, num, in block["l"]: + fhandle.write(f" {orb_dict[orb].upper()} {len(block['coefficients'])}\n") + for lnum in range(num): + for ii, entry in enumerate(block["coefficients"]): + exponent = entry[0] + coefficient = entry[1 + lnum + offset] + fhandle.write(f" {ii + 1:3d} {exponent:15.7f} {coefficient:15.7f}\n") + offset = num + def get_matching_pseudopotential(self, *args, **kwargs): """ From 0c367900156d7cfcb6796cc0cc4fe6e1a2b3110e Mon Sep 17 00:00:00 2001 From: addman Date: Thu, 30 Sep 2021 14:51:22 +0200 Subject: [PATCH 26/47] Add non unique version of BasisSet for basis set operations --- aiida_gaussian_datatypes/basisset/data.py | 54 +++++++++++++++-------- setup.json | 4 ++ 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index d175269..b6d6e6a 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -22,7 +22,7 @@ from icecream import ic -class BasisSet(Data): +class BasisSetCommon(Data): """ Provide a general way to store GTO basis sets from different codes within the AiiDA framework. """ @@ -49,7 +49,7 @@ def __init__(self, element=None, name=None, aliases=None, tags=None, n_el=None, if "label" not in kwargs: kwargs["label"] = name - super(BasisSet, self).__init__(**kwargs) + super(BasisSetCommon, self).__init__(**kwargs) self.set_attribute("name", name) self.set_attribute("element", element) @@ -60,25 +60,10 @@ def __init__(self, element=None, name=None, aliases=None, tags=None, n_el=None, self.set_attribute("version", version) def store(self, *args, **kwargs): - """ - Store the node, ensuring that the combination (element,name,version) is unique. - """ - # TODO: this uniqueness check is not race-condition free. - - try: - existing = self.get(self.element, self.name, self.version, match_aliases=False) - except NotExistent: - pass - else: - raise UniquenessError( - f"Gaussian Basis Set already exists for" - f" element={self.element}, name={self.name}, version={self.version}: {existing.uuid}" - ) - - return super(BasisSet, self).store(*args, **kwargs) + return super(BasisSetCommon, self).store(*args, **kwargs) def _validate(self): - super(BasisSet, self)._validate() + super(BasisSetCommon, self)._validate() from cp2k_input_tools.basissets import BasisSetData @@ -524,3 +509,34 @@ def get_matching_pseudopotential(self, *args, **kwargs): return Pseudopotential.get(element=self.element, n_el=self.n_el, *args, **kwargs) else: return Pseudopotential.get(element=self.element, *args, **kwargs) + +class BasisSet(BasisSetCommon): + + def __init__(self, *args, **kwargs): + super(BasisSet, self).__init__(*args, **kwargs) + + def store(self, *args, **kwargs): + """ + Store the node, ensuring that the combination (element,name,version) is unique. + """ + # TODO: this uniqueness check is not race-condition free. + + try: + existing = self.get(self.element, self.name, self.version, match_aliases=False) + except NotExistent: + pass + else: + raise UniquenessError( + f"Gaussian Basis Set already exists for" + f" element={self.element}, name={self.name}, version={self.version}: {existing.uuid}" + ) + + return super(BasisSet, self).store(*args, **kwargs) + +class BasisSetFree(BasisSetCommon): + + def __init__(self, *args, **kwargs): + super(BasisSetFree, self).__init__(*args, **kwargs) + + def store(self, *args, **kwargs): + return super(BasisSetFree, self).store(*args, **kwargs) diff --git a/setup.json b/setup.json index 6a20664..b4d404b 100644 --- a/setup.json +++ b/setup.json @@ -22,6 +22,7 @@ "entry_points": { "aiida.data": [ "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet", + "gaussian.basissetfree = aiida_gaussian_datatypes.basisset.data:BasisSetFree", "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential", "gaussian.pseudo.gthpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential", "gaussian.pseudo.ecppseudopotential = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential" @@ -34,6 +35,9 @@ "aiida.groups": [ "gaussian.basisset = aiida_gaussian_datatypes.groups:BasisSetGroup", "gaussian.pseudo = aiida_gaussian_datatypes.groups:PseudopotentialGroup" + ], + "aiida.calculations": [ + "gaussian.uncontract = aiida_gaussian_datatypes.calc.uncontract:uncontract" ] }, "scripts": [], From 99b9b88e84418b1d45b5ab2d9bc6c1a22f7a7413 Mon Sep 17 00:00:00 2001 From: addman Date: Thu, 30 Sep 2021 14:51:56 +0200 Subject: [PATCH 27/47] Change the order of stored function. In Gamess format the order is unintuitive first block is he upper most angular momentum and after goes the s, p, d ... --- aiida_gaussian_datatypes/pseudopotential/data.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 6b1c3ba..3dffb0f 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -365,6 +365,14 @@ def exists(pseudo): """ functions[-1]["polynoms"] = [ int(x) for x in functions[-1]["polynoms"] ] + """ + Change the order of functions so they match orbital momentum + + In GAMESS format first block represents upper most lmax + and then the rest s, p, d, ... + """ + functions = functions[1:] + [functions[0]] + """ TODO properly extract name """ @@ -433,7 +441,8 @@ def to_gamess(self, fhandle): if isinstance(self, ECPPseudopotential): fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n") - for fun in self.functions: + functions = [self.functions[-1]] + self.functions[:-1] + for fun in functions: fhandle.write(f"{len(fun)}\n") for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]): fhandle.write(f"{prefactor:10.7f} {polynom:d} {exponent:10.7f}\n") From c722b7dfa2d8ced11a6cab1dd86e16e99b382787 Mon Sep 17 00:00:00 2001 From: addman Date: Thu, 30 Sep 2021 15:33:27 +0200 Subject: [PATCH 28/47] Add writer for turborvb format, fix an error in gamess format writer --- .../pseudopotential/cli.py | 4 ++- .../pseudopotential/data.py | 27 +++++++++++++++++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index 32eb64c..3bfb491 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -184,7 +184,8 @@ def list_pseudo(sym, name, tags): @click.option('tags', '--tag', '-t', multiple=True, help="filter by a tag (all tags must be present if specified multiple times)") @click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', - 'gamess']), default='cp2k', + 'gamess', + 'turborvb']), default='cp2k', help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ])) @decorators.with_dbenv() # fmt: on @@ -200,6 +201,7 @@ def dump_pseudo(sym, name, tags, output_format, data): writers = { "cp2k": Pseudopotential.to_cp2k, "gamess": Pseudopotential.to_gamess, + "turborvb": Pseudopotential.to_turborvb, } if data: diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 3dffb0f..b12f3d8 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -443,9 +443,32 @@ def to_gamess(self, fhandle): fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n") functions = [self.functions[-1]] + self.functions[:-1] for fun in functions: - fhandle.write(f"{len(fun)}\n") + fhandle.write(f"{len(fun['polynoms'])}\n") for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]): - fhandle.write(f"{prefactor:10.7f} {polynom:d} {exponent:10.7f}\n") + fhandle.write(f"{prefactor:12.7f} {polynom:4d} {exponent:12.7f}\n") + + + else: + """ + make an error + """ + pass + + def to_turborvb(self, fhandle): + """ + Write this Pseudopotential instance to a file in TurboRVB format. + + :param fhandle: open file handle + """ + + if isinstance(self, ECPPseudopotential): + fhandle.write(f"GEN\n") + fhandle.write(f"1 0 {self.lmax}\n") + fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ])) + fhandle.write("\n") + for fun in self.functions: + for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]): + fhandle.write(f"{prefactor:12.7f} {polynom:4d} {exponent:12.7f}\n") else: From cdfd800415b7b853d24b0371bce77486e1b30fed Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 08:20:05 +0200 Subject: [PATCH 29/47] Add job for uncontraction --- aiida_gaussian_datatypes/calc/__init__py | 0 aiida_gaussian_datatypes/calc/uncontract.py | 33 +++++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 aiida_gaussian_datatypes/calc/__init__py create mode 100644 aiida_gaussian_datatypes/calc/uncontract.py diff --git a/aiida_gaussian_datatypes/calc/__init__py b/aiida_gaussian_datatypes/calc/__init__py new file mode 100644 index 0000000..e69de29 diff --git a/aiida_gaussian_datatypes/calc/uncontract.py b/aiida_gaussian_datatypes/calc/uncontract.py new file mode 100644 index 0000000..cdc7124 --- /dev/null +++ b/aiida_gaussian_datatypes/calc/uncontract.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +from aiida.plugins import DataFactory +from aiida.engine import calcfunction +from icecream import ic +""" + +""" + +BasisSet = DataFactory("gaussian.basisset") +BasisSetFree = DataFactory("gaussian.basissetfree") + +@calcfunction +def uncontract(basisset): + """ + + """ + def disassemble(block): + n = block["n"] + l = block["l"] + for exp, cont in block["coefficients"]: + yield {"n" : n, + "l" : l, + "coefficients": [[exp, 1.0]]} + attr = basisset.attributes + blocks = [] + for block in attr["blocks"]: + blocks.extend([ b for b in disassemble(block) ]) + attr["blocks"] = blocks + attr["name"] += "-uncont" + ret = BasisSetFree(**attr) + return ret + From 4957c50d762b8bce762f144893940193fa92946e Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 09:23:54 +0200 Subject: [PATCH 30/47] Add Uniqness test to basis sets imported via from_gamess --- aiida_gaussian_datatypes/basisset/data.py | 57 +++++++++++------------ 1 file changed, 27 insertions(+), 30 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index b6d6e6a..c348ecb 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -324,6 +324,13 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = :rtype: list """ + def exists(bset): + try: + cls.get(bset["element"], bset["name"], match_aliases=False) + except NotExistent: + return False + + return True """ NWCHEM parser @@ -375,36 +382,6 @@ def block_creator(b, orb, blocks = blocks): block_creator(data, orb) data = [] - if duplicate_handling == "ignore": # simply filter duplicates - #bsets = [bs for bs in bsets if not exists(bs)] - pass - - elif duplicate_handling == "error": - #for bset in bsets: - # try: - # latest = cls.get(bset["element"], bset["name"], match_aliases=False) - # except NotExistent: - # pass - # else: - # raise UniquenessError( - # f"Gaussian Basis Set already exists for" - # f" element={bset['element']}, name={bset['name']}: {latest.uuid}" - # ) - pass - - elif duplicate_handling == "new": - #for bset in bsets: - # try: - # latest = cls.get(bset["element"], bset["name"], match_aliases=False) - # except NotExistent: - # pass - # else: - # bset["version"] = latest.version + 1 - pass - - else: - raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") - try: basis = {"element" : element.capitalize(), "version" : 1, @@ -430,6 +407,26 @@ def block_creator(b, orb, blocks = blocks): if len(basis["aliases"]) == 0: del basis["aliases"] + if duplicate_handling == "ignore": # simply filter duplicates + if exists(basis): + return [] + + elif duplicate_handling == "error": + if exists(basis): + raise UniquenessError( f"Gaussian Basis Set already exists for" + f" element={basis['element']}, name={basis['name']}: {latest.uuid}") + + elif duplicate_handling == "new": + try: + latest = cls.get(basis["element"], basis["name"], match_aliases=False) + except NotExistent: + pass + else: + basis["version"] = latest.version + 1 + + else: + raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") + return [cls(**basis)] def to_cp2k(self, fhandle): From b9fd2a698096314bff915c3cba736f9eab415bcc Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 09:29:01 +0200 Subject: [PATCH 31/47] Rename MitasLibrary to QmcpackLibrary --- aiida_gaussian_datatypes/libraries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index a8c6ac1..8829b23 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -49,7 +49,7 @@ class EmptyLibrary(_ExternalLibrary): pass @LibraryBookKeeper.register_library -class MitasLibrary(_ExternalLibrary): +class QmcpackLibrary(_ExternalLibrary): _URL = "https://github.com/QMCPACK/pseudopotentiallibrary.git" From 7283a7319a643ccfe7785e76ea85616b930433c1 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 09:48:20 +0200 Subject: [PATCH 32/47] Add force-ignore option --- aiida_gaussian_datatypes/basisset/data.py | 5 ++++- aiida_gaussian_datatypes/libraries.py | 5 ++--- aiida_gaussian_datatypes/pseudopotential/data.py | 5 ++++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index c348ecb..51ab2c9 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -407,7 +407,10 @@ def block_creator(b, orb, blocks = blocks): if len(basis["aliases"]) == 0: del basis["aliases"] - if duplicate_handling == "ignore": # simply filter duplicates + if duplicate_handling == "force-ignore": # It will check at the store stage + pass + + elif duplicate_handling == "ignore": # simply filter duplicates if exists(basis): return [] diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 8829b23..0d4c3c1 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -71,7 +71,7 @@ def add_data(p, elements = elements): """ Load Pseudopotential first """ with open(p, "r") as fhandle: pseudo, = Pseudopotential.from_gamess(fhandle, - duplicate_handling = "new") + duplicate_handling = "force-ignore") tags.append(f"q{pseudo.n_el_tot}") tags.append(f"c{pseudo.core_electrons}") pseudo.tags.extend(tags) @@ -79,7 +79,6 @@ def add_data(p, elements = elements): pseudos = [{"path": p, "obj": pseudo}] - """ Load Basis sets """ basis = [] for r in (p.parent).glob("**/*"): @@ -88,7 +87,7 @@ def add_data(p, elements = elements): name = f"{typ}-{name}" with open(r, "r") as fhandle: b = BasisSet.from_nwchem(fhandle, - duplicate_handling = "new", + duplicate_handling = "force-ignore", attrs = {"n_el": pseudo.n_el_tot, "name": name, "tags": tags}) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index b12f3d8..17363e8 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -391,7 +391,10 @@ def exists(pseudo): "n_el" : None, "n_el_tot" : SYM2NUM[element] - core_electrons} - if duplicate_handling == "ignore": # simply filter duplicates + if duplicate_handling == "force-ignore": # This will be checked at the store stage + pass + + elif duplicate_handling == "ignore": # simply filter duplicates if exists(data): return [] From 79c42c4a568800a16bb43692d3c7aab6dd2af37f Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 10:42:38 +0200 Subject: [PATCH 33/47] Update setup.json --- setup.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.json b/setup.json index b4d404b..33d94b6 100644 --- a/setup.json +++ b/setup.json @@ -43,6 +43,9 @@ "scripts": [], "reentry_register": true, "install_requires": [ + "gitpython >= 3.1.24", + "icecream >= 2.1.1", + "pydriller >= 2.0", "pydantic >= 1.8.1", "aiida-core >= 1.6.2", "cp2k-input-tools >= 0.7.3" From 9d9e05a4f5813e8a25349990021c6ab379053236 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 12:04:34 +0200 Subject: [PATCH 34/47] Fix naming convention for pseudo --- aiida_gaussian_datatypes/fetcher/cli.py | 1 + aiida_gaussian_datatypes/libraries.py | 3 ++- aiida_gaussian_datatypes/pseudopotential/data.py | 10 +++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index c427833..a5e4c99 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -2,6 +2,7 @@ import click import tabulate +import pydriller from pathlib import Path from aiida.cmdline.utils import decorators, echo from aiida.cmdline.commands.cmd_data import verdi_data diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 0d4c3c1..b15d340 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -71,7 +71,8 @@ def add_data(p, elements = elements): """ Load Pseudopotential first """ with open(p, "r") as fhandle: pseudo, = Pseudopotential.from_gamess(fhandle, - duplicate_handling = "force-ignore") + duplicate_handling = "force-ignore", + attrs = {"name" : typ }) tags.append(f"q{pseudo.n_el_tot}") tags.append(f"c{pseudo.core_electrons}") pseudo.tags.extend(tags) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 17363e8..92085d1 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -320,7 +320,7 @@ def decimal2str(val): return [GTHPseudopotential(**p) for p in pseudos] @classmethod - def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False): + def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None): """ Constructs a list with pseudopotential objects from a Pseudopotential in GAMESS format @@ -339,6 +339,9 @@ def exists(pseudo): return True + if not attrs: + attrs = {} + """ Parser for Gamess format """ @@ -376,6 +379,7 @@ def exists(pseudo): """ TODO properly extract name """ + element = name.split("-")[0] lmax = int(lmax) core_electrons = int(core_electrons) @@ -391,6 +395,10 @@ def exists(pseudo): "n_el" : None, "n_el_tot" : SYM2NUM[element] - core_electrons} + if "name" in attrs: + data["aliases"].append(data["name"]) + data["name"] = attrs["name"] + if duplicate_handling == "force-ignore": # This will be checked at the store stage pass From 170aa3fbfa8cd4e3d7b04f61558550d816397f3f Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 12:13:03 +0200 Subject: [PATCH 35/47] Fix small typo --- aiida_gaussian_datatypes/fetcher/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index a5e4c99..ad43b08 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -153,7 +153,7 @@ def install_family(library): echo.echo("Skipping (something went wrong)") for p in o["pseudos"]: pseudo = p["obj"] - echo.echo_info(f"Adding Basis for: ", nl=False) + echo.echo_info(f"Adding Pseudopotential for: ", nl=False) echo.echo(f"{pseudo.element} ({pseudo.name})... ", nl=False) try: pseudo.store() From 7c78e9e9da651c7631491fd3e14635292e549450 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 13:26:41 +0200 Subject: [PATCH 36/47] Add version controling based on commit hashes --- aiida_gaussian_datatypes/libraries.py | 44 +++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index b15d340..64b4fd3 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -9,11 +9,16 @@ import git import tempfile import pathlib +import pydriller from aiida_gaussian_datatypes import utils from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar from icecream import ic from .basisset.data import BasisSet -from .pseudopotential.data import Pseudopotential +from .pseudopotential.data import Pseudopotential, ECPPseudopotential + +from aiida.common.exceptions import ( + NotExistent, +) class LibraryBookKeeper: @@ -57,7 +62,7 @@ class QmcpackLibrary(_ExternalLibrary): def fetch(cls): elements = {} - def add_data(p, elements = elements): + def add_data(p, tempdir, elements = elements): element = str(p.parent.parent.name) if element not in utils.SYM2NUM: # Check if element is valid return @@ -73,6 +78,22 @@ def add_data(p, elements = elements): pseudo, = Pseudopotential.from_gamess(fhandle, duplicate_handling = "force-ignore", attrs = {"name" : typ }) + + commithash = "" + for commit in pydriller.Repository(str(tempdir), filepath=str(p)).traverse_commits(): + commithash = commit.hash + if commithash == "": return + pseudo.extras["commithash"] = commithash + + try: + latest = ECPPseudopotential.get(pseudo.element, + pseudo.name) + pseudo.version = latest.version + if latest.extras["commithash"] != commithash: + pseudo.version += 1 + except NotExistent: + pass + tags.append(f"q{pseudo.n_el_tot}") tags.append(f"c{pseudo.core_electrons}") pseudo.tags.extend(tags) @@ -94,9 +115,26 @@ def add_data(p, elements = elements): "tags": tags}) if len(b) == 0: continue b, = b + + commithash = "" + for commit in pydriller.Repository(str(tempdir), filepath=str(r)).traverse_commits(): + commithash = commit.hash + if commithash == "": return + b.extras["commithash"] = commithash + + try: + latest = BasisSet.get(b.element, + b.name) + b.version = latest.version + if latest.extras["commithash"] != commithash: + b.version += 1 + except NotExistent: + pass + basis.append({"path": r, "obj": b}) + if element not in elements: elements[element] = {"path": element_path, "types": {}} @@ -112,7 +150,7 @@ def add_data(p, elements = elements): for p in (tempdir/"recipes").glob("**/*"): if re.match("[A-z]{1,2}\.ccECP\.gamess", p.name): - add_data(p) + add_data(p, tempdir) return elements From 2f69d247958b12ef4c6c1984d918ca37492e98b7 Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 1 Oct 2021 13:46:35 +0200 Subject: [PATCH 37/47] Changed version controling. Now the version number is the serial number of the commit --- aiida_gaussian_datatypes/libraries.py | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 64b4fd3..6556270 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -78,21 +78,12 @@ def add_data(p, tempdir, elements = elements): pseudo, = Pseudopotential.from_gamess(fhandle, duplicate_handling = "force-ignore", attrs = {"name" : typ }) - commithash = "" - for commit in pydriller.Repository(str(tempdir), filepath=str(p)).traverse_commits(): + for version, commit in enumerate(pydriller.Repository(str(tempdir), filepath=str(p)).traverse_commits()): commithash = commit.hash if commithash == "": return pseudo.extras["commithash"] = commithash - - try: - latest = ECPPseudopotential.get(pseudo.element, - pseudo.name) - pseudo.version = latest.version - if latest.extras["commithash"] != commithash: - pseudo.version += 1 - except NotExistent: - pass + pseudo.attributes["version"] = version + 1 tags.append(f"q{pseudo.n_el_tot}") tags.append(f"c{pseudo.core_electrons}") @@ -117,19 +108,11 @@ def add_data(p, tempdir, elements = elements): b, = b commithash = "" - for commit in pydriller.Repository(str(tempdir), filepath=str(r)).traverse_commits(): + for version, commit in enumerate(pydriller.Repository(str(tempdir), filepath=str(r)).traverse_commits()): commithash = commit.hash if commithash == "": return b.extras["commithash"] = commithash - - try: - latest = BasisSet.get(b.element, - b.name) - b.version = latest.version - if latest.extras["commithash"] != commithash: - b.version += 1 - except NotExistent: - pass + b.attributes["version"] = version + 1 basis.append({"path": r, "obj": b}) From 2e05946f0166348d0350cbd73eb3493e66a37fea Mon Sep 17 00:00:00 2001 From: addman Date: Mon, 13 Dec 2021 21:14:40 +0100 Subject: [PATCH 38/47] Fix bad PP generation for TurboRVB, and others --- aiida_gaussian_datatypes/basisset/data.py | 28 ++++- .../pseudopotential/cli.py | 3 +- .../pseudopotential/data.py | 113 +++++++++++++++++- 3 files changed, 140 insertions(+), 4 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 51ab2c9..27659bb 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -69,7 +69,7 @@ def _validate(self): try: # directly raises an exception for the data if something's amiss, extra fields are ignored - BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes}) + # BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes}) #assert isinstance(self.name, str) and self.name assert ( @@ -497,6 +497,32 @@ def to_gamess(self, fhandle): fhandle.write(f" {ii + 1:3d} {exponent:15.7f} {coefficient:15.7f}\n") offset = num + def to_gaussian(self, fhandle): + """ + Write the Basis Set to the passed file handle in the format expected by Gaussian. + + :param fhandle: A valid output file handle + """ + orb_dict = {0 : "s", + 1 : "p", + 2 : "d", + 3 : "f", + 4 : "g", + 5 : "h", + 6 : "i" } + + fhandle.write(f"# from AiiDA BasisSet\n") + for block in self.blocks: + offset = 0 + for orb, num, in block["l"]: + fhandle.write(f" {orb_dict[orb].upper()} {len(block['coefficients'])}\n") + for lnum in range(num): + for ii, entry in enumerate(block["coefficients"]): + exponent = entry[0] + coefficient = entry[1 + lnum + offset] + fhandle.write(f" {ii + 1:3d} {exponent:15.7f} {coefficient:15.7f}\n") + offset = num + def get_matching_pseudopotential(self, *args, **kwargs): """ diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index 3bfb491..b0166d2 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -75,7 +75,7 @@ def cli(): help="filter by a tag (all tags must be present if specified multiple times)") @click.option( 'fformat', '-f', '--format', - type=click.Choice(['cp2k', 'gamess' ]), default='cp2k', + type=click.Choice(['cp2k', 'gamess', 'turborvb' ]), default='cp2k', help="the format of the pseudopotential file") @click.option( '--duplicates', @@ -99,6 +99,7 @@ def import_pseudo(pseudopotential_file, fformat, sym, tags, duplicates, ignore_i loaders = { "cp2k": Pseudopotential.from_cp2k, "gamess": Pseudopotential.from_gamess, + "turborvb": Pseudopotential.from_turborvb, } filters = { diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 92085d1..0cc110b 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -422,6 +422,112 @@ def exists(pseudo): raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") return [ECPPseudopotential(**data)] + @classmethod + def from_turborvb(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None, name = None): + """ + Constructs a list with pseudopotential objects from a Pseudopotential in TurboRVB format + + :param fhandle: open file handle + :param filters: a dict with attribute filter functions + :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version)) + :param ignore_invalid: whether to ignore invalid entries silently + :rtype: list + """ + + if hasattr(fhandle, "name"): + import re + if re.match("Z[0-9]{1,2}\_atomnumber[0-9]{1,2}\.[A-z]+", + fhandle.name): + ret = re.match("Z[0-9]{1,2}\_atomnumber([0-9]{1,2})\.[A-z]+", + fhandle.name) + atnum = int(ret.group(1)) + element = list(SYM2NUM.keys())[list(SYM2NUM.values()).index(atnum)] + name = fhandle.name + + + def exists(pseudo): + try: + cls.get(pseudo["element"], pseudo["name"], match_aliases=False) + except NotExistent: + return False + + return True + + if not attrs: + attrs = {} + + """ + Parser for TurboRVB format + """ + + functions = [] + ns = 0 + for ii, line in enumerate(fhandle): + if ii == 0: continue + if ii == 1: + num, r0, lmax = [float(x) for x in line.split()] + continue + if ii == 2: + numf = [float(x) for x in line.split()] + for jj in range(len(numf)): + functions.append({"prefactors" : [], + "polynoms" : [], + "exponents" : []}) + continue + for jj in range(len(numf)): + if numf[jj] < 1: continue + numf[jj] -= 1 + for key, value in zip(("prefactors", "polynoms", "exponents"), map(float, line.split())): + functions[jj][key].append(value) + + functions[jj]["polynoms"] = [ int(x) for x in functions[jj]["polynoms"] ] + break + + """ + TODO properly extract name + """ + + lmax = int(lmax) + + data = {"functions" : functions, + "element" : element, + "aliases" : [name], + "name" : name, + "core_electrons" : 0, + "lmax" : lmax, + "version" : 1, + "n_el" : None, + "n_el_tot" : 0} + + if "name" in attrs: + data["aliases"].append(data["name"]) + data["name"] = attrs["name"] + + if duplicate_handling == "force-ignore": # This will be checked at the store stage + pass + + elif duplicate_handling == "ignore": # simply filter duplicates + if exists(data): + return [] + + elif duplicate_handling == "error": + if exists(data): + raise UniquenessError( + f"Gaussian Pseudopotential already exists for" + f" element={data['element']}, name={data['name']}: {latest.uuid}" + ) + + elif duplicate_handling == "new": + if exists(data): + latest = cls.get(data["element"], data["name"], match_aliases=False) + data["version"] = latest.version + 1 + + else: + raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") + + pp = ECPPseudopotential(**data) + pp.set_extra("r0", r0) + return [pp] def to_cp2k(self, fhandle): """ @@ -473,8 +579,11 @@ def to_turborvb(self, fhandle): """ if isinstance(self, ECPPseudopotential): - fhandle.write(f"GEN\n") - fhandle.write(f"1 0 {self.lmax}\n") + fhandle.write(f"ECP\n") + r0 = 0.0 + if "r0" in self.extras: + r0 = self.extras["r0"] + fhandle.write(f"1 {r0:4.2f} {len(self.functions)}\n") fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ])) fhandle.write("\n") for fun in self.functions: From bff08713deb746169da3b96ee32c740200db0b6f Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Wed, 23 Feb 2022 16:47:43 +0100 Subject: [PATCH 39/47] Add automatic tolerance detection --- .../pseudopotential/cli.py | 6 ++-- .../pseudopotential/data.py | 32 +++++++++++++++++-- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index b0166d2..814bb0a 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -188,9 +188,11 @@ def list_pseudo(sym, name, tags): 'gamess', 'turborvb']), default='cp2k', help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ])) +@click.option('-t', '--tolerance', type=str, default=1.0e-5, + help="set tolerance value for pseudo cutoff (default 1.0e-5, only for turborvb format)") @decorators.with_dbenv() # fmt: on -def dump_pseudo(sym, name, tags, output_format, data): +def dump_pseudo(sym, name, tags, output_format, data, tolerance): """ Print specified Pseudopotentials """ @@ -232,4 +234,4 @@ def dump_pseudo(sym, name, tags, output_format, data): if echo.is_stdout_redirected(): echo.echo_info("Dumping {}/{} ({})...".format(pseudo.name, pseudo.element, pseudo.uuid), err=True) - writers[output_format](pseudo, sys.stdout) + writers[output_format](pseudo, sys.stdout, tolerance = tolerance) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 0cc110b..7751089 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -10,6 +10,7 @@ from ..utils import SYM2NUM from decimal import Decimal from icecream import ic +import numpy as np from aiida.common.exceptions import ( MultipleObjectsError, @@ -529,7 +530,7 @@ def exists(pseudo): pp.set_extra("r0", r0) return [pp] - def to_cp2k(self, fhandle): + def to_cp2k(self, fhandle, **kwargs): """ Write this Pseudopotential instance to a file in CP2K format. @@ -549,7 +550,7 @@ def to_cp2k(self, fhandle): """ pass - def to_gamess(self, fhandle): + def to_gamess(self, fhandle, **kwargs): """ Write this Pseudopotential instance to a file in Gamess format. @@ -571,18 +572,43 @@ def to_gamess(self, fhandle): """ pass - def to_turborvb(self, fhandle): + def to_turborvb(self, fhandle, tolerance = 1.0e-5): """ Write this Pseudopotential instance to a file in TurboRVB format. :param fhandle: open file handle + :param tolerance: tolerance for pseudopotential """ + def f(r, block): + nmax = len(block) + psip = np.zeros(nmax) + fun = 0.0 + if r < 1.0e-9: r = 1.0e-9 + + for i in range(nmax): + psip[i] = np.exp(-block[i][2]*r*r + np.log(r)*block[i][1]) + + for i in range(nmax): + fun += psip[i] * block[i][0] + + return fun/r/r if isinstance(self, ECPPseudopotential): fhandle.write(f"ECP\n") r0 = 0.0 if "r0" in self.extras: r0 = self.extras["r0"] + r0s = [] + for fun in self.functions: + X = [ ii for ii in np.arange(0,10,0.01) ] + block = [ [prefactor, polynom, exponent] for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")])] + Y = [ f(x, block) for x in X ] + for ii in reversed(range(len(X))): + if Y[ii] > tolerance: + r0s.append(X[ii]) + break + r0 = max(r0s) + fhandle.write(f"1 {r0:4.2f} {len(self.functions)}\n") fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ])) fhandle.write("\n") From a7d96faf92240c9438133bb7e2fa668433682976 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 23 Feb 2022 18:53:38 +0100 Subject: [PATCH 40/47] WIP --- aiida_gaussian_datatypes/libraries.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 6556270..3ea4f42 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -137,3 +137,25 @@ def add_data(p, tempdir, elements = elements): return elements +@LibraryBookKeeper.register_library +class BFDLibrary(_ExternalLibrary): + + _URL = "http://burkatzki.com/pseudos/step4.2.php?format=gaussian&element={e}&basis={b}" + + @classmethod + def fetch(cls): + + from ase.data import chemical_symbols + from ase.data import atomic_numbers + from time import sleep + + list_of_basis =[ f"v{s}z" for s in "dtq56" ] + list_of_basis += [ f"{x}_ano" for x in list_of_basis ] + + for b in list_of_basis: + for ie in range(1, 87): + l = cls._URL.format(b = b, e = chemical_symbols[ie]) + to_file(urlopen(l).read(), ie, b) + """ Cool down """ + sleep(0.5) + From 7370aac40a4f2593fd984c2dea923fe5542937b8 Mon Sep 17 00:00:00 2001 From: addman Date: Thu, 3 Mar 2022 08:28:25 +0100 Subject: [PATCH 41/47] should work --- aiida_gaussian_datatypes/basisset/cli.py | 1 + aiida_gaussian_datatypes/basisset/data.py | 117 +++++++++++++++++- aiida_gaussian_datatypes/libraries.py | 57 ++++++++- .../pseudopotential/data.py | 103 +++++++++++++++ 4 files changed, 276 insertions(+), 2 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py index a299782..9256866 100644 --- a/aiida_gaussian_datatypes/basisset/cli.py +++ b/aiida_gaussian_datatypes/basisset/cli.py @@ -91,6 +91,7 @@ def import_basisset(basisset_file, fformat, sym, tags, duplicates, group): loaders = { "cp2k": BasisSet.from_cp2k, "nwchem": BasisSet.from_nwchem, + "gaussian": BasisSet.from_gaussian, } filters = { diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index 27659bb..5b2f232 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -313,6 +313,121 @@ def decimal2str(val): return [cls(**bs) for bs in bsets] + @classmethod + def from_gaussian(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = None): + """ + Constructs a list with basis set objects from a Basis Set in Gaussian format + + :param fhandle: open file handle + :param filters: a dict with attribute filter functions + :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version)) + :rtype: list + """ + + def exists(bset): + try: + cls.get(bset["element"], bset["name"], match_aliases=False) + except NotExistent: + return False + + return True + + """ + Gaussian parser + + TODO Maybe parser should move to "parsers" + """ + + element = None + data = [] + blocks = [] + + if not attrs: + attrs = {} + + def block_creator(b, orb, blocks = blocks): + orb_dict = {"s" : 0, + "p" : 1, + "d" : 2, + "f" : 3, + "g" : 4, + "h" : 5, + "i" : 6 } + block = { "n": 0, # I dont know how to setup main quantum number + "l": [(orb_dict[orb], 1)], + "coefficients" : [ [ d["exp"], d["cont"] ] for d in b ] } + blocks.append(block) + + orb = "x" + for ii, line in enumerate(fhandle): + if ii == 1: + element = line.lower().split()[0] + continue + if re.match("^[A-z ]+[0-9\. ]*$", line): + if len(data) != 0: + block_creator(data, orb) + data = [] + orb = line.lower().split()[0] + if re.match("^[+-.0-9 ]+$", line): + exp, cont, = [ float(x) for x in line.split() ] + data.append({"exp" : exp, + "cont" : cont }) + if len(data) != 0: + block_creator(data, orb) + data = [] + + try: + basis = {"element" : element.capitalize(), + "version" : 1, + "name" : "unknown", + "tags" : [], + "aliases" : [], + "blocks" : blocks } + except: + return [] + + basis["name"] = "NA" + + if hasattr(fhandle, "name"): + basis["name"] = Path(fhandle.name).name.replace(".nwchem", "") + basis["aliases"].append(basis["name"].split(".")[-1]) + + if "name" in attrs: + basis["aliases"].append(basis["name"]) + basis["name"] = attrs["name"] + + for attr in ("n_el", "tags",): + if attr in attrs: + basis[attr] = attrs[attr] + + if len(basis["aliases"]) == 0: + del basis["aliases"] + + if duplicate_handling == "force-ignore": # It will check at the store stage + pass + + elif duplicate_handling == "ignore": # simply filter duplicates + if exists(basis): + return [] + + elif duplicate_handling == "error": + if exists(basis): + raise UniquenessError( f"Gaussian Basis Set already exists for" + f" element={basis['element']}, name={basis['name']}: {latest.uuid}") + + elif duplicate_handling == "new": + try: + latest = cls.get(basis["element"], basis["name"], match_aliases=False) + except NotExistent: + pass + else: + basis["version"] = latest.version + 1 + + else: + raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") + + return [cls(**basis)] + @classmethod def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = None): """ @@ -366,7 +481,7 @@ def block_creator(b, orb, blocks = blocks): if len(data) != 0: block_creator(data, orb) data = [] - el, orb, = line.lower().split() + el, orb = line.lower().split() if element is None: """ TODO check validity of element diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 3ea4f42..8e5c4a4 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -4,6 +4,7 @@ # Was there really a fish # That grants you that kind of wish # + import os import re import git @@ -147,7 +148,60 @@ def fetch(cls): from ase.data import chemical_symbols from ase.data import atomic_numbers + from urllib.request import urlopen from time import sleep + import io + + elements = {} + def add_data(source, e, b): + + source = str(source) + pat=re.compile("^.*?(" + e + "\s0.*$)",re.M|re.DOTALL) + x = pat.sub("\g<1>", source) + x = re.sub("\", "\n", x) + x = re.sub("\ ", "", x) + x = re.sub("\ ", "", x) + x = re.sub(".*html.*$", "", x) + pat=re.compile("^.*?(" + e + "\s0.*)("+e+" 0.*)$",re.M|re.DOTALL) + m = pat.match(x) + if(m): + bas = m.group(1) + ecp = m.group(2) + if len(bas) < 15: return + typ = "BFD" + pseudo, = Pseudopotential.from_gaussian(io.StringIO(ecp), + duplicate_handling = "force-ignore", + attrs = {"name" : f"{typ}" }) + + basisset, = BasisSet.from_gaussian(io.StringIO(f"\n{bas}"), + duplicate_handling = "force-ignore", + attrs = {"name" : f"{typ}-{b}" }) + pseudos = [{"path": "", + "obj": pseudo}] + version = 1 + pseudo.attributes["version"] = version + + tags = [] + tags.append(f"q{pseudo.n_el_tot}") + tags.append(f"c{pseudo.core_electrons}") + pseudo.tags.extend(tags) + + if e not in elements: + elements[e] = {"path": "", + "types": {}} + + if typ not in elements[e]["types"]: + elements[e]["types"][typ] = {"path": "", + "basis": [], + "pseudos": pseudos, + "tags": []} + elements[e]["types"][typ]["tags"].extend(tags) + elements[e]["types"][typ]["basis"].append({"path" : f"http://burkatzki.com|{b}", + "obj" : basisset}) + elements[e]["types"][typ]["tags"].append("BFD") + tt = set(elements[e]["types"][typ]["tags"]) + elements[e]["types"][typ]["tags"] = list(tt) + list_of_basis =[ f"v{s}z" for s in "dtq56" ] list_of_basis += [ f"{x}_ano" for x in list_of_basis ] @@ -155,7 +209,8 @@ def fetch(cls): for b in list_of_basis: for ie in range(1, 87): l = cls._URL.format(b = b, e = chemical_symbols[ie]) - to_file(urlopen(l).read(), ie, b) + add_data(urlopen(l).read(), chemical_symbols[ie], b) """ Cool down """ sleep(0.5) + return elements diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 0cc110b..da8074f 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -319,6 +319,108 @@ def decimal2str(val): return [GTHPseudopotential(**p) for p in pseudos] + @classmethod + def from_gaussian(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None): + """ + Constructs a list with pseudopotential objects from a Pseudopotential in Gaussian format + + :param fhandle: open file handle + :param filters: a dict with attribute filter functions + :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version)) + :param ignore_invalid: whether to ignore invalid entries silently + :rtype: list + """ + + def exists(pseudo): + try: + cls.get(pseudo["element"], pseudo["name"], match_aliases=False) + except NotExistent: + return False + + return True + + if not attrs: + attrs = {} + + """ + Parser for Gaussian format + """ + + was_comment_line = 0 + functions = [] + for ii, line in enumerate(fhandle): + if len(line.strip()) == 0: continue + if ii == 0: + element, n, = line.split() + continue + if ii == 1: + qmc, n, core_electrons, = line.split() + continue + if was_comment_line == -1: + was_comment_line = int(line.strip()) + if was_comment_line == 0: + functions.append({"prefactors" : [], + "polynoms" : [], + "exponents" : []}) + else: + was_comment_line -= 1 + functions[-1]["exponents"].append(int(line.strip()[0])) + functions[-1]["polynoms"].append(float(line.strip()[1])) + functions[-1]["prefactors"].append(float(line.strip()[2])) + + """ + Change the order of functions so they match orbital momentum + + In Gaussian format first block represents upper most lmax + and then the rest s, p, d, ... + """ + functions = functions[1:] + [functions[0]] + + """ + TODO properly extract name + """ + + lmax = len(functions) - 1 + core_electrons = int(core_electrons) + + data = {"functions" : functions, + "element" : element, + "aliases" : [qmc], + "name" : qmc, + "core_electrons" : core_electrons, + "lmax" : lmax, + "version" : 1, + "n_el" : None, + "n_el_tot" : SYM2NUM[element] - core_electrons} + + if "name" in attrs: + data["aliases"].append(data["name"]) + data["name"] = attrs["name"] + + if duplicate_handling == "force-ignore": # This will be checked at the store stage + pass + + elif duplicate_handling == "ignore": # simply filter duplicates + if exists(data): + return [] + + elif duplicate_handling == "error": + if exists(data): + raise UniquenessError( + f"Gaussian Pseudopotential already exists for" + f" element={data['element']}, name={data['name']}: {latest.uuid}" + ) + + elif duplicate_handling == "new": + if exists(data): + latest = cls.get(data["element"], data["name"], match_aliases=False) + data["version"] = latest.version + 1 + + else: + raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") + + return [ECPPseudopotential(**data)] + @classmethod def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None): """ @@ -422,6 +524,7 @@ def exists(pseudo): raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'") return [ECPPseudopotential(**data)] + @classmethod def from_turborvb(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None, name = None): """ From 04d7a0c698d90635a9c250a95ce61e1b5cbc7e55 Mon Sep 17 00:00:00 2001 From: addman Date: Thu, 3 Mar 2022 10:03:47 +0100 Subject: [PATCH 42/47] UPD cli --- aiida_gaussian_datatypes/fetcher/cli.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py index ad43b08..37fc395 100644 --- a/aiida_gaussian_datatypes/fetcher/cli.py +++ b/aiida_gaussian_datatypes/fetcher/cli.py @@ -69,13 +69,21 @@ def __new__(cls, num, element, t, p, tags, b): p = "" tags = [] + name = "" + m = re.match("http:\/\/burkatzki\.com\|([A-z]+)", b) + if m: + name = m.group(1) + m = re.match("[A-z]{1,2}\.(.+).nwchem", b) + if m: + name = m.group(1) + return ( num, element, t, p, - " ".join(tags), - re.match("[A-z]{1,2}\.(.+).nwchem", b).group(1), + " ".join(sorted(tags)), + name, b ) @@ -86,9 +94,14 @@ def __new__(cls, num, element, t, p, tags, b): continue p = d["types"][t]["pseudos"][0] for b in d["types"][t]["basis"]: - table_content.append(row(ii, e, t, p["path"].name, + name = "" + if isinstance(b["path"], str): + name = b["path"] + if hasattr(b["path"], "name"): + name = b["path"].name + table_content.append(row(ii, e, t, name, d["types"][t]["tags"], - b["path"].name)) + name)) #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())] return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"]) From 4d3944879b95082b5bb47d8382f9acae3928a957 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 27 Apr 2022 11:40:23 +0200 Subject: [PATCH 43/47] Clean uncontract --- aiida_gaussian_datatypes/calc/__init__.py | 0 aiida_gaussian_datatypes/calc/uncontract.py | 1 - 2 files changed, 1 deletion(-) create mode 100644 aiida_gaussian_datatypes/calc/__init__.py diff --git a/aiida_gaussian_datatypes/calc/__init__.py b/aiida_gaussian_datatypes/calc/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aiida_gaussian_datatypes/calc/uncontract.py b/aiida_gaussian_datatypes/calc/uncontract.py index cdc7124..6bc4654 100644 --- a/aiida_gaussian_datatypes/calc/uncontract.py +++ b/aiida_gaussian_datatypes/calc/uncontract.py @@ -2,7 +2,6 @@ from aiida.plugins import DataFactory from aiida.engine import calcfunction -from icecream import ic """ """ From 316549840f00543b72ac283703853637309114a1 Mon Sep 17 00:00:00 2001 From: addman Date: Wed, 27 Apr 2022 11:56:23 +0200 Subject: [PATCH 44/47] Clean pseudopotential --- aiida_gaussian_datatypes/pseudopotential/cli.py | 2 +- .../pseudopotential/data.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py index 814bb0a..70ef2ce 100644 --- a/aiida_gaussian_datatypes/pseudopotential/cli.py +++ b/aiida_gaussian_datatypes/pseudopotential/cli.py @@ -188,7 +188,7 @@ def list_pseudo(sym, name, tags): 'gamess', 'turborvb']), default='cp2k', help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ])) -@click.option('-t', '--tolerance', type=str, default=1.0e-5, +@click.option('-r', '--tolerance', type=float, default=1.0e-5, help="set tolerance value for pseudo cutoff (default 1.0e-5, only for turborvb format)") @decorators.with_dbenv() # fmt: on diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 42074c2..1f091da 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -347,22 +347,23 @@ def exists(pseudo): Parser for Gaussian format """ - was_comment_line = 0 + was_comment_line = 2 functions = [] + functions.append({"prefactors" : [], + "polynoms" : [], + "exponents" : []}) for ii, line in enumerate(fhandle): - if len(line.strip()) == 0: continue + ic(line.strip()) if ii == 0: element, n, = line.split() continue if ii == 1: qmc, n, core_electrons, = line.split() continue + if ii == 2: + continue if was_comment_line == -1: was_comment_line = int(line.strip()) - if was_comment_line == 0: - functions.append({"prefactors" : [], - "polynoms" : [], - "exponents" : []}) else: was_comment_line -= 1 functions[-1]["exponents"].append(int(line.strip()[0])) @@ -675,7 +676,7 @@ def to_gamess(self, fhandle, **kwargs): """ pass - def to_turborvb(self, fhandle, tolerance = 1.0e-5): + def to_turborvb(self, fhandle, tolerance = 1.0e-5, index = 1, **kwargs): """ Write this Pseudopotential instance to a file in TurboRVB format. @@ -712,7 +713,7 @@ def f(r, block): break r0 = max(r0s) - fhandle.write(f"1 {r0:4.2f} {len(self.functions)}\n") + fhandle.write(f"{index} {r0:4.2f} {len(self.functions)}\n") fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ])) fhandle.write("\n") for fun in self.functions: From 4cd04e06f1f7966de3c3323e8bb68be1ade4703e Mon Sep 17 00:00:00 2001 From: addman Date: Fri, 29 Apr 2022 08:16:35 +0200 Subject: [PATCH 45/47] Fix bugs in BFD dowloader --- aiida_gaussian_datatypes/libraries.py | 10 ++++--- .../pseudopotential/data.py | 27 ++++++++++++------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py index 8e5c4a4..4e58933 100644 --- a/aiida_gaussian_datatypes/libraries.py +++ b/aiida_gaussian_datatypes/libraries.py @@ -13,7 +13,6 @@ import pydriller from aiida_gaussian_datatypes import utils from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar -from icecream import ic from .basisset.data import BasisSet from .pseudopotential.data import Pseudopotential, ECPPseudopotential @@ -173,9 +172,14 @@ def add_data(source, e, b): duplicate_handling = "force-ignore", attrs = {"name" : f"{typ}" }) + tags = [typ] + if "ano" in b: + tags.append("ANO") basisset, = BasisSet.from_gaussian(io.StringIO(f"\n{bas}"), duplicate_handling = "force-ignore", - attrs = {"name" : f"{typ}-{b}" }) + attrs = {"name" : f"{typ}-{b}", + "n_el" : pseudo.n_el_tot, + "tags" : tags}) pseudos = [{"path": "", "obj": pseudo}] version = 1 @@ -207,7 +211,7 @@ def add_data(source, e, b): list_of_basis += [ f"{x}_ano" for x in list_of_basis ] for b in list_of_basis: - for ie in range(1, 87): + for ie in range(1, 86): l = cls._URL.format(b = b, e = chemical_symbols[ie]) add_data(urlopen(l).read(), chemical_symbols[ie], b) """ Cool down """ diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index 1f091da..25f71e7 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -10,6 +10,7 @@ from ..utils import SYM2NUM from decimal import Decimal from icecream import ic +import re import numpy as np from aiida.common.exceptions import ( @@ -347,11 +348,8 @@ def exists(pseudo): Parser for Gaussian format """ - was_comment_line = 2 + block_counter = 0 functions = [] - functions.append({"prefactors" : [], - "polynoms" : [], - "exponents" : []}) for ii, line in enumerate(fhandle): ic(line.strip()) if ii == 0: @@ -362,13 +360,22 @@ def exists(pseudo): continue if ii == 2: continue - if was_comment_line == -1: - was_comment_line = int(line.strip()) else: - was_comment_line -= 1 - functions[-1]["exponents"].append(int(line.strip()[0])) - functions[-1]["polynoms"].append(float(line.strip()[1])) - functions[-1]["prefactors"].append(float(line.strip()[2])) + ic(block_counter) + if block_counter == 0: + if line.strip() == "": + continue + m = re.match("[ ]*([0-9])+[ ]*$", line) + if m: + block_counter = int(m.group(1)) + functions.append({"prefactors" : [], + "polynoms" : [], + "exponents" : []}) + else: + functions[-1]["polynoms"].append(int(line.strip().split()[0])) + functions[-1]["exponents"].append(float(line.strip().split()[1])) + functions[-1]["prefactors"].append(float(line.strip().split()[2])) + block_counter -= 1 """ Change the order of functions so they match orbital momentum From 24e729d59cf3f6f8cafc89e8f6b5fda61a25d2e1 Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Tue, 8 Nov 2022 12:11:20 +0100 Subject: [PATCH 46/47] Supress warning --- aiida_gaussian_datatypes/basisset/data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py index b942cfa..77f6a52 100644 --- a/aiida_gaussian_datatypes/basisset/data.py +++ b/aiida_gaussian_datatypes/basisset/data.py @@ -330,12 +330,12 @@ def block_creator(b, orb, blocks = blocks): if ii == 1: element = line.lower().split()[0] continue - if re.match("^[A-z ]+[0-9\. ]*$", line): + if re.match(r"^[A-z ]+[0-9\. ]*$", line): if len(data) != 0: block_creator(data, orb) data = [] orb = line.lower().split()[0] - if re.match("^[+-.0-9 ]+$", line): + if re.match(r"^[+-.0-9 ]+$", line): exp, cont, = [ float(x) for x in line.split() ] data.append({"exp" : exp, "cont" : cont }) From 3f79b28123312d2e39d31c211a8bd05382c47bfa Mon Sep 17 00:00:00 2001 From: Otto Kohulak Date: Wed, 9 Nov 2022 14:27:25 +0100 Subject: [PATCH 47/47] Fix warning and errors in tests --- aiida_gaussian_datatypes/pseudopotential/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py index e798fd1..11de26f 100644 --- a/aiida_gaussian_datatypes/pseudopotential/data.py +++ b/aiida_gaussian_datatypes/pseudopotential/data.py @@ -523,9 +523,9 @@ def from_turborvb(cls, fhandle, filters=None, duplicate_handling="ignore", ignor if hasattr(fhandle, "name"): import re - if re.match("Z[0-9]{1,2}\_atomnumber[0-9]{1,2}\.[A-z]+", + if re.match(r"Z[0-9]{1,2}\_atomnumber[0-9]{1,2}\.[A-z]+", fhandle.name): - ret = re.match("Z[0-9]{1,2}\_atomnumber([0-9]{1,2})\.[A-z]+", + ret = re.match(r"Z[0-9]{1,2}\_atomnumber([0-9]{1,2})\.[A-z]+", fhandle.name) atnum = int(ret.group(1)) element = list(SYM2NUM.keys())[list(SYM2NUM.values()).index(atnum)] @@ -888,6 +888,6 @@ def _pseudodata2dict(data: PseudopotentialData) -> Dict[str, Any]: def _dict2pseudodata(data: Dict[str, Any]) -> PseudopotentialData: - obj = {k: v for k, v in data.items() if k not in ("name", "tags", "version")} + obj = {k: v for k, v in data.items() if k not in ("name", "tags", "version", "n_el_tot")} obj["identifiers"] = obj.pop("aliases") return PseudopotentialData.parse_obj(obj)