From b716c4bea38849adb798259170b7fc27c90dd1fc Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Fri, 3 Sep 2021 17:03:00 +0200
Subject: [PATCH 01/47] WIP

---
 aiida_gaussian_datatypes/libraries.py           | 12 ++++++++++++
 aiida_gaussian_datatypes/pseudopotential/cli.py | 11 +++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 aiida_gaussian_datatypes/libraries.py

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
new file mode 100644
index 0000000..75c8e16
--- /dev/null
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: MIT
+
+from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar
+
+class _ExternalLibrary:
+
+    def fetch(self):
+        pass
+
+class MitasLibrary(_ExternalLibrary):
+    pass
diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index f5f3cc8..dd30c47 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -223,3 +223,14 @@ def dump_pseudo(sym, name, tags, output_format, data):
             echo.echo_info("Dumping {}/{} ({})...".format(pseudo.name, pseudo.element, pseudo.uuid), err=True)
 
         writers[output_format](pseudo, sys.stdout)
+
+# fmt: off
+@cli.command('install')
+@click.argument('pseudopotential_library', type=click.Choice(,)
+@decorators.with_dbenv()
+# fmt: on
+def install_family(pseudopotential_library):
+    """
+    Installs a family of pseudo potentials from a remote repository
+    """
+    pass

From 9ba12152fc69852134be18ee5c75d4ef60a27b1c Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Tue, 7 Sep 2021 18:41:19 +0200
Subject: [PATCH 02/47] WIP

---
 .../pseudopotential/data.py                   | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 691e966..8c4212d 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -8,6 +8,7 @@
 
 import dataclasses
 from decimal import Decimal
+from icecream import ic
 
 from aiida.common.exceptions import (
     MultipleObjectsError,
@@ -352,6 +353,44 @@ def decimal2str(val):
 
         return [cls(**p) for p in pseudos]
 
+    @classmethod
+    def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False):
+        """
+        Constructs a list with pseudopotential objects from a Pseudopotential in GAMESS format
+
+        :param fhandle: open file handle
+        :param filters: a dict with attribute filter functions
+        :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version))
+        :param ignore_invalid: whether to ignore invalid entries silently
+        :rtype: list
+        """
+
+        if not filters:
+            filters = {}
+
+        pseudos = [
+            p
+            for p in (
+                dataclasses.asdict(p, dict_factory=dict_fact)
+                for p in PseudopotentialData.datafile_iter(fhandle, keep_going=ignore_invalid)
+            )
+            if matches_criteria(p)
+        ]
+
+        if duplicate_handling == "ignore":  # simply filter duplicates
+            pass
+
+        elif duplicate_handling == "error":
+            pass
+
+        elif duplicate_handling == "new":
+            pass
+
+        else:
+            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
+
+        return []
+
     def to_cp2k(self, fhandle):
         """
         Write this Pseudopotential instance to a file in CP2K format.

From 141261ca0a79c3429063c0f4f50e50a4ef4a0d36 Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Wed, 8 Sep 2021 16:46:38 +0200
Subject: [PATCH 03/47] WIP

---
 .../pseudopotential/data.py                   | 100 ++++++++++++++----
 setup.json                                    |   3 +-
 2 files changed, 81 insertions(+), 22 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 8c4212d..7077faa 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -32,9 +32,6 @@ def __init__(
         aliases=None,
         tags=None,
         n_el=None,
-        local=None,
-        non_local=None,
-        nlcc=None,
         version=1,
         **kwargs,
     ):
@@ -57,18 +54,12 @@ def __init__(
         if not n_el:
             n_el = []
 
-        if not non_local:
-            non_local = []
-
-        if not nlcc:
-            nlcc = []
-
         if "label" not in kwargs:
             kwargs["label"] = name
 
         super().__init__(**kwargs)
 
-        for attr in ("name", "element", "tags", "aliases", "n_el", "local", "non_local", "nlcc", "version"):
+        for attr in ("name", "element", "tags", "aliases", "n_el", "version"):
             self.set_attribute(attr, locals()[attr])
 
     def store(self, *args, **kwargs):
@@ -351,7 +342,7 @@ def decimal2str(val):
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
-        return [cls(**p) for p in pseudos]
+        return [GTHPseudopotential(**p) for p in pseudos]
 
     @classmethod
     def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False):
@@ -365,17 +356,12 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         :rtype: list
         """
 
-        if not filters:
-            filters = {}
+        for ii, line in enumerate(fhandle):
+            if len(line.strip()) == 0: continue
+            if ii == 0:
+                name, gen, core_electrons, number = line.split()
+                continue
 
-        pseudos = [
-            p
-            for p in (
-                dataclasses.asdict(p, dict_factory=dict_fact)
-                for p in PseudopotentialData.datafile_iter(fhandle, keep_going=ignore_invalid)
-            )
-            if matches_criteria(p)
-        ]
 
         if duplicate_handling == "ignore":  # simply filter duplicates
             pass
@@ -416,6 +402,78 @@ def get_matching_basisset(self, *args, **kwargs):
             return BasisSet.get(element=self.element, *args, **kwargs)
 
 
+class GTHPseudopotential(Pseudopotential):
+
+    def __init__(
+        self,
+        local=None,
+        non_local=None,
+        nlcc=None,
+        **kwargs):
+        """
+        :param element: string containing the name of the element
+        :param name: identifier for this basis set, usually something like <name>-<size>[-q<nvalence>]
+        :param aliases: alternative names
+        :param tags: additional tags
+        :param n_el: number of valence electrons covered by this basis set
+        :param local: see :py:attr:`~local`
+        :param local: see :py:attr:`~non_local`
+        """
+
+        if not non_local:
+            non_local = []
+
+        if not nlcc:
+            nlcc = []
+
+        super().__init__(**kwargs)
+
+        for attr in ("local", "non_local", "nlcc"):
+            self.set_attribute(attr, locals()[attr])
+
+    @property
+    def local(self):
+        """
+        Return the local part
+
+        The format of the returned dictionary::
+
+            {
+                'r': float,
+                'coeffs': [float, float, ...],
+            }
+
+        :rtype:dict
+        """
+        return self.get_attribute("local", None)
+
+    @property
+    def non_local(self):
+        """
+        Return a list of non-local projectors (for l=0,1...).
+
+        Each list element will have the following format::
+
+            {
+                'r': float,
+                'nproj': int,
+                'coeffs': [float, float, ...],  # only the upper-triangular elements
+            }
+
+        :rtype:list
+        """
+        return self.get_attribute("non_local", [])
+
+    @property
+    def nlcc(self):
+        """
+        Return a list of the non-local core-corrections data
+
+        :rtype:list
+        """
+        return self.get_attribute("nlcc", [])
+
+
 def _dict2pseudodata(data):
     from cp2k_input_tools.pseudopotentials import (
         PseudopotentialData,
diff --git a/setup.json b/setup.json
index 381a559..7122cc7 100644
--- a/setup.json
+++ b/setup.json
@@ -22,7 +22,8 @@
     "entry_points": {
         "aiida.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet",
-            "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential"
+            "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential",
+            "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential"
         ],
         "aiida.cmdline.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli",

From 440796c6cf596b07c4fd52797d2a354686542f3f Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Wed, 8 Sep 2021 19:09:57 +0200
Subject: [PATCH 04/47] WIP Gamess pseudo reader almost works

---
 .../pseudopotential/cli.py                    |  3 +-
 .../pseudopotential/data.py                   | 98 ++++++++++---------
 setup.json                                    |  3 +-
 3 files changed, 56 insertions(+), 48 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index f5f3cc8..e0f4602 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -71,7 +71,7 @@ def cli():
     help="filter by a tag (all tags must be present if specified multiple times)")
 @click.option(
     'fformat', '-f', '--format',
-    type=click.Choice(['cp2k', ]), default='cp2k',
+    type=click.Choice(['cp2k', 'gamess' ]), default='cp2k',
     help="the format of the pseudopotential file")
 @click.option(
     '--duplicates',
@@ -94,6 +94,7 @@ def import_pseudo(pseudopotential_file, fformat, sym, tags, duplicates, ignore_i
 
     loaders = {
         "cp2k": Pseudopotential.from_cp2k,
+        "gamess": Pseudopotential.from_gamess,
     }
 
     filters = {
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 7077faa..c6d2bc6 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -41,8 +41,6 @@ def __init__(
         :param aliases: alternative names
         :param tags: additional tags
         :param n_el: number of valence electrons covered by this basis set
-        :param local: see :py:attr:`~local`
-        :param local: see :py:attr:`~non_local`
         """
 
         if not aliases:
@@ -85,7 +83,6 @@ def _validate(self):
 
         try:
             # directly raises a ValidationError for the pseudo data if something's amiss
-            _dict2pseudodata(self.attributes)
 
             assert isinstance(self.name, str) and self.name
             assert (
@@ -152,48 +149,6 @@ def n_el(self):
 
         return self.get_attribute("n_el", [])
 
-    @property
-    def local(self):
-        """
-        Return the local part
-
-        The format of the returned dictionary::
-
-            {
-                'r': float,
-                'coeffs': [float, float, ...],
-            }
-
-        :rtype:dict
-        """
-        return self.get_attribute("local", None)
-
-    @property
-    def non_local(self):
-        """
-        Return a list of non-local projectors (for l=0,1...).
-
-        Each list element will have the following format::
-
-            {
-                'r': float,
-                'nproj': int,
-                'coeffs': [float, float, ...],  # only the upper-triangular elements
-            }
-
-        :rtype:list
-        """
-        return self.get_attribute("non_local", [])
-
-    @property
-    def nlcc(self):
-        """
-        Return a list of the non-local core-corrections data
-
-        :rtype:list
-        """
-        return self.get_attribute("nlcc", [])
-
     @classmethod
     def get(cls, element, name=None, version="latest", match_aliases=True, group_label=None, n_el=None):
         """
@@ -356,11 +311,33 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         :rtype: list
         """
 
+        functions = []
+        ns = 0
         for ii, line in enumerate(fhandle):
             if len(line.strip()) == 0: continue
             if ii == 0:
                 name, gen, core_electrons, number = line.split()
                 continue
+            if ns == 0:
+                ns = int(line)
+                functions.append({"prefactors" : [],
+                                  "polynoms"   : [],
+                                  "exponents"  : []})
+            else:
+                for key, value in zip(("prefactors", "polynoms", "exponents"), map(float, line.split())):
+                    functions[-1][key].append(value)
+                ns -= 1
+
+        """
+        TODO properly extract name
+        """
+        element = name.split("-")[0]
+
+        data = {"functions" : functions,
+                "element"   : element,
+                "aliases"   : [name],
+                "name"      : name,
+                "n_el"      : [1]}
 
 
         if duplicate_handling == "ignore":  # simply filter duplicates
@@ -375,7 +352,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
-        return []
+        return [ECPPseudopotential(**data)]
 
     def to_cp2k(self, fhandle):
         """
@@ -473,6 +450,35 @@ def nlcc(self):
         """
         return self.get_attribute("nlcc", [])
 
+    def _validate(self):
+        super()._validate()
+
+        try:
+            _dict2pseudodata(self.attributes)
+        except Exception as exc:
+            raise ValidationError("One or more invalid fields found") from exc
+
+
+class ECPPseudopotential(Pseudopotential):
+
+    def __init__(
+        self,
+        functions=None,
+        lmax=1,
+        **kwargs):
+        """
+        :param functions:
+        :param lmax: maximum angular momentum
+        """
+
+        if not functions:
+            functions = []
+
+        super().__init__(**kwargs)
+
+        for attr in ("functions", "lmax"):
+            self.set_attribute(attr, locals()[attr])
+
 
 def _dict2pseudodata(data):
     from cp2k_input_tools.pseudopotentials import (
diff --git a/setup.json b/setup.json
index 7122cc7..c43e59d 100644
--- a/setup.json
+++ b/setup.json
@@ -23,7 +23,8 @@
         "aiida.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet",
             "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential",
-            "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential"
+            "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential",
+            "gaussian.ecppseudo = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential"
         ],
         "aiida.cmdline.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli",

From a2ceabf94fee6c230da8b7053c666aa549b5da0a Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Tue, 21 Sep 2021 11:35:48 +0200
Subject: [PATCH 05/47] WIP

---
 .../pseudopotential/cli.py                    |  4 +-
 .../pseudopotential/data.py                   | 77 +++++++++++++++++--
 setup.json                                    |  4 +-
 3 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index e0f4602..a82d765 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -179,7 +179,8 @@ def list_pseudo(sym, name, tags):
               help="filter by name")
 @click.option('tags', '--tag', '-t', multiple=True,
               help="filter by a tag (all tags must be present if specified multiple times)")
-@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', ]), default='cp2k',
+@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k',
+                                                                    'gamess']), default='cp2k',
               help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ]))
 @decorators.with_dbenv()
 # fmt: on
@@ -194,6 +195,7 @@ def dump_pseudo(sym, name, tags, output_format, data):
 
     writers = {
         "cp2k": Pseudopotential.to_cp2k,
+        "gamess": Pseudopotential.to_gamess,
     }
 
     if data:
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index c6d2bc6..ed4a9c5 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -316,7 +316,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         for ii, line in enumerate(fhandle):
             if len(line.strip()) == 0: continue
             if ii == 0:
-                name, gen, core_electrons, number = line.split()
+                name, gen, core_electrons, lmax = line.split()
                 continue
             if ns == 0:
                 ns = int(line)
@@ -328,15 +328,25 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
                     functions[-1][key].append(value)
                 ns -= 1
 
+                """
+                Cast polynoms to Integers
+                """
+                functions[-1]["polynoms"] = [ int(x) for x in functions[-1]["polynoms"] ]
+
         """
         TODO properly extract name
         """
         element = name.split("-")[0]
+        lmax = int(lmax)
+        core_electrons = int(core_electrons)
+
 
         data = {"functions" : functions,
                 "element"   : element,
                 "aliases"   : [name],
                 "name"      : name,
+                "core_electrons" : core_electrons,
+                "lmax"      : lmax,
                 "n_el"      : [1]}
 
 
@@ -352,7 +362,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
-        return [ECPPseudopotential(**data)]
+        return [SMPseudopotential(**data)]
 
     def to_cp2k(self, fhandle):
         """
@@ -361,10 +371,39 @@ def to_cp2k(self, fhandle):
         :param fhandle: open file handle
         """
 
-        fhandle.write(f"# from AiiDA Pseudopotential<uuid: {self.uuid}>\n")
-        for line in _dict2pseudodata(self.attributes).cp2k_format_line_iter():
-            fhandle.write(line)
-            fhandle.write("\n")
+        if isinstance(self, GTHPseudopotential):
+
+            fhandle.write(f"# from AiiDA Pseudopotential<uuid: {self.uuid}>\n")
+            for line in _dict2pseudodata(self.attributes).cp2k_format_line_iter():
+                fhandle.write(line)
+                fhandle.write("\n")
+
+        else:
+            """
+            make an error
+            """
+            pass
+
+    def to_gamess(self, fhandle):
+        """
+        Write this Pseudopotential instance to a file in Gamess format.
+
+        :param fhandle: open file handle
+        """
+
+        if isinstance(self, SMPseudopotential):
+            fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n")
+            for fun in self.functions:
+                fhandle.write(f"{len(fun)}\n")
+                for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]):
+                    fhandle.write(f"{prefactor:10.7f} {polynom:d} {exponent:10.7f}\n")
+
+
+        else:
+            """
+            make an error
+            """
+            pass
 
     def get_matching_basisset(self, *args, **kwargs):
         """
@@ -459,12 +498,13 @@ def _validate(self):
             raise ValidationError("One or more invalid fields found") from exc
 
 
-class ECPPseudopotential(Pseudopotential):
+class SMPseudopotential(Pseudopotential):
 
     def __init__(
         self,
         functions=None,
         lmax=1,
+        core_electrons=0,
         **kwargs):
         """
         :param functions:
@@ -476,9 +516,30 @@ def __init__(
 
         super().__init__(**kwargs)
 
-        for attr in ("functions", "lmax"):
+        for attr in ("functions", "lmax", "core_electrons"):
             self.set_attribute(attr, locals()[attr])
 
+    @property
+    def lmax(self):
+        """
+        :rtype:int
+        """
+        return self.get_attribute("lmax", [])
+
+    @property
+    def core_electrons(self):
+        """
+        :rtype:int
+        """
+        return self.get_attribute("core_electrons", [])
+
+    @property
+    def functions(self):
+        """
+        :rtype:int
+        """
+        return self.get_attribute("functions", [])
+
 
 def _dict2pseudodata(data):
     from cp2k_input_tools.pseudopotentials import (
diff --git a/setup.json b/setup.json
index c43e59d..7e98820 100644
--- a/setup.json
+++ b/setup.json
@@ -23,8 +23,8 @@
         "aiida.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet",
             "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential",
-            "gaussian.gthpseudo = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential",
-            "gaussian.ecppseudo = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential"
+            "gaussian.pseudo.gthpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential",
+            "gaussian.pseudo.smpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:SMPseudopotential"
         ],
         "aiida.cmdline.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli",

From 9e6089b120609f71fce657fca2b520b5455e36eb Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Tue, 21 Sep 2021 11:55:49 +0200
Subject: [PATCH 06/47] Add PP type when dumping list if PPs

---
 aiida_gaussian_datatypes/pseudopotential/cli.py  | 6 ++++--
 aiida_gaussian_datatypes/pseudopotential/data.py | 6 ++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index a82d765..6ac0ad8 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -28,6 +28,7 @@ def _formatted_table_import(pseudos):
     def row(num, pseudo):
         return (
             num + 1,
+            pseudo.__name__.replace("Pseudopotential", ""),
             pseudo.element,
             _names_column(pseudo.name, pseudo.aliases),
             ", ".join(pseudo.tags),
@@ -36,7 +37,7 @@ def row(num, pseudo):
         )
 
     table_content = [row(n, p) for n, p in enumerate(pseudos)]
-    return tabulate.tabulate(table_content, headers=["Nr.", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"])
+    return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"])
 
 
 def _formatted_table_list(pseudos):
@@ -45,6 +46,7 @@ def _formatted_table_list(pseudos):
     def row(pseudo):
         return (
             pseudo.uuid,
+            pseudo.__name__.replace("Pseudopotential", ""),
             pseudo.element,
             _names_column(pseudo.name, pseudo.aliases),
             ", ".join(pseudo.tags),
@@ -53,7 +55,7 @@ def row(pseudo):
         )
 
     table_content = [row(p) for p in pseudos]
-    return tabulate.tabulate(table_content, headers=["ID", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"])
+    return tabulate.tabulate(table_content, headers=["ID", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"])
 
 
 @verdi_data.group("gaussian.pseudo")
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index ed4a9c5..ca29a89 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -25,6 +25,8 @@ class Pseudopotential(Data):
     fixme: extend to NLCC pseudos.
     """
 
+    __name__ = "Pseudopotential"
+
     def __init__(
         self,
         element=None,
@@ -420,6 +422,8 @@ def get_matching_basisset(self, *args, **kwargs):
 
 class GTHPseudopotential(Pseudopotential):
 
+    __name__ = "GTHPseudopotential"
+
     def __init__(
         self,
         local=None,
@@ -500,6 +504,8 @@ def _validate(self):
 
 class SMPseudopotential(Pseudopotential):
 
+    __name__ = "SMPseudopotential"
+
     def __init__(
         self,
         functions=None,

From d6498efcf36d0132b31f0891aa4eb58e57840027 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 22 Sep 2021 14:33:14 +0200
Subject: [PATCH 07/47] Rename new PPs again. Minor changes.

---
 .../pseudopotential/cli.py                    |  4 +-
 .../pseudopotential/data.py                   | 37 +++++++++++--------
 setup.json                                    |  2 +-
 3 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index 6ac0ad8..f538c3a 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -28,7 +28,7 @@ def _formatted_table_import(pseudos):
     def row(num, pseudo):
         return (
             num + 1,
-            pseudo.__name__.replace("Pseudopotential", ""),
+            pseudo.__name__.replace("Pseudopotential", "") if hasattr(pseudo, "__name__") else "",
             pseudo.element,
             _names_column(pseudo.name, pseudo.aliases),
             ", ".join(pseudo.tags),
@@ -46,7 +46,7 @@ def _formatted_table_list(pseudos):
     def row(pseudo):
         return (
             pseudo.uuid,
-            pseudo.__name__.replace("Pseudopotential", ""),
+            pseudo.__name__.replace("Pseudopotential", "") if hasattr(pseudo, "__name__") else "",
             pseudo.element,
             _names_column(pseudo.name, pseudo.aliases),
             ", ".join(pseudo.tags),
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index ca29a89..9ebc7d4 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -313,6 +313,10 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         :rtype: list
         """
 
+        """
+        Parser for Gamess format
+        """
+
         functions = []
         ns = 0
         for ii, line in enumerate(fhandle):
@@ -343,13 +347,13 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         core_electrons = int(core_electrons)
 
 
-        data = {"functions" : functions,
-                "element"   : element,
-                "aliases"   : [name],
-                "name"      : name,
+        data = {"functions"      : functions,
+                "element"        : element,
+                "aliases"        : [name],
+                "name"           : name,
                 "core_electrons" : core_electrons,
-                "lmax"      : lmax,
-                "n_el"      : [1]}
+                "lmax"           : lmax,
+                "n_el"           : None}
 
 
         if duplicate_handling == "ignore":  # simply filter duplicates
@@ -364,7 +368,7 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
-        return [SMPseudopotential(**data)]
+        return [ECPPseudopotential(**data)]
 
     def to_cp2k(self, fhandle):
         """
@@ -393,7 +397,7 @@ def to_gamess(self, fhandle):
         :param fhandle: open file handle
         """
 
-        if isinstance(self, SMPseudopotential):
+        if isinstance(self, ECPPseudopotential):
             fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n")
             for fun in self.functions:
                 fhandle.write(f"{len(fun)}\n")
@@ -431,11 +435,6 @@ def __init__(
         nlcc=None,
         **kwargs):
         """
-        :param element: string containing the name of the element
-        :param name: identifier for this basis set, usually something like <name>-<size>[-q<nvalence>]
-        :param aliases: alternative names
-        :param tags: additional tags
-        :param n_el: number of valence electrons covered by this basis set
         :param local: see :py:attr:`~local`
         :param local: see :py:attr:`~non_local`
         """
@@ -502,9 +501,9 @@ def _validate(self):
             raise ValidationError("One or more invalid fields found") from exc
 
 
-class SMPseudopotential(Pseudopotential):
+class ECPPseudopotential(Pseudopotential):
 
-    __name__ = "SMPseudopotential"
+    __name__ = "ECPPseudopotential"
 
     def __init__(
         self,
@@ -528,6 +527,8 @@ def __init__(
     @property
     def lmax(self):
         """
+        Return maximum angular momentum
+
         :rtype:int
         """
         return self.get_attribute("lmax", [])
@@ -535,6 +536,8 @@ def lmax(self):
     @property
     def core_electrons(self):
         """
+        Returns number of core electrons
+
         :rtype:int
         """
         return self.get_attribute("core_electrons", [])
@@ -542,7 +545,9 @@ def core_electrons(self):
     @property
     def functions(self):
         """
-        :rtype:int
+        Returns list of basis functions
+
+        :rtype:list
         """
         return self.get_attribute("functions", [])
 
diff --git a/setup.json b/setup.json
index 7e98820..6d59c57 100644
--- a/setup.json
+++ b/setup.json
@@ -24,7 +24,7 @@
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet",
             "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential",
             "gaussian.pseudo.gthpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential",
-            "gaussian.pseudo.smpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:SMPseudopotential"
+            "gaussian.pseudo.ecppseudopotential = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential"
         ],
         "aiida.cmdline.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli",

From 4691b81355960d797fa6d4deb872fef4c607ce25 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 22 Sep 2021 14:33:58 +0200
Subject: [PATCH 08/47] Fix corrupted tests. For strange reasons retrieved PPs
 were in different order. I do not consider this as an error, therefore, I
 changed the assert comparission of maps of element-lists to maps of
 element-sets. Also one validation test had to be changed.

---
 tests/test_group.py       | 10 ++++++----
 tests/test_pseudo_data.py |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/tests/test_group.py b/tests/test_group.py
index 5aceea9..afb7ab2 100644
--- a/tests/test_group.py
+++ b/tests/test_group.py
@@ -33,10 +33,11 @@ def test_pseudopotential_group_get():
     pseudogroup.add_nodes([pseudo.store() for pseudo in pseudos])
 
     retrieved_pseudos = pseudogroup.get_pseudos(elements=["Li", "H"])
+    retrieved_pseudos = {key: {x for x in val} for key, val in retrieved_pseudos.items()}
 
     assert retrieved_pseudos == {
-        "Li": [p for p in pseudos if p.element == "Li"],
-        "H": [p for p in pseudos if p.element == "H"],
+        "Li": {p for p in pseudos if p.element == "Li"},
+        "H": {p for p in pseudos if p.element == "H"},
     }
 
 
@@ -66,8 +67,9 @@ def test_pseudopotential_group_get_structure():
     structure.append_atom(position=(0.500, 0.500, 0.500), symbols="H")
 
     retrieved_pseudos = pseudogroup.get_pseudos(structure=structure)
+    retrieved_pseudos = {key: {x for x in val} for key, val in retrieved_pseudos.items()}
 
     assert retrieved_pseudos == {
-        "Li": [p for p in pseudos if p.element == "Li"],
-        "H": [p for p in pseudos if p.element == "H"],
+        "Li": {p for p in pseudos if p.element == "Li"},
+        "H": {p for p in pseudos if p.element == "H"},
     }
diff --git a/tests/test_pseudo_data.py b/tests/test_pseudo_data.py
index 43d0b72..d358911 100644
--- a/tests/test_pseudo_data.py
+++ b/tests/test_pseudo_data.py
@@ -79,7 +79,7 @@ def test_validation_empty():
 
 
 def test_validation_invalid_local():
-    Pseudo = DataFactory("gaussian.pseudo")
+    Pseudo = DataFactory("gaussian.pseudo.gthpseudopotential")
     pseudo = Pseudo(name="test", element="H", local={"r": 1.23, "coeffs": [], "something": "else"})
 
     with pytest.raises(ValidationError):

From dbb3f595e3feecb4b148bb20e559f6f9efec76eb Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 22 Sep 2021 16:14:26 +0200
Subject: [PATCH 09/47] Fix small error in Pseudopotenial.get, giving support
 for other types of PPs.

---
 aiida_gaussian_datatypes/pseudopotential/data.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 9ebc7d4..05fe900 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -171,7 +171,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab
             query.append(Group, filters={"label": group_label}, tag="group")
             params["with_group"] = "group"
 
-        query.append(Pseudopotential, **params)
+        query.append(cls, **params)
 
         filters = {"attributes.element": {"==": element}}
 
@@ -184,7 +184,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab
             else:
                 filters["attributes.name"] = {"==": name}
 
-        query.add_filter(Pseudopotential, filters)
+        query.add_filter(cls, filters)
 
         # SQLA ORM only solution:
         # query.order_by({Pseudopotential: [{"attributes.version": {"cast": "i", "order": "desc"}}]})
@@ -355,7 +355,6 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
                 "lmax"           : lmax,
                 "n_el"           : None}
 
-
         if duplicate_handling == "ignore":  # simply filter duplicates
             pass
 

From 0fa709011018a7a72ca8734e963ae894e7581b04 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 22 Sep 2021 16:29:21 +0200
Subject: [PATCH 10/47] Add duplicate handling for ECPs

---
 .../pseudopotential/data.py                   | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 05fe900..c48bf6c 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -313,6 +313,14 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
         :rtype: list
         """
 
+        def exists(pseudo):
+            try:
+                cls.get(pseudo["element"], pseudo["name"], match_aliases=False)
+            except NotExistent:
+                return False
+
+            return True
+
         """
         Parser for Gamess format
         """
@@ -353,16 +361,24 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_
                 "name"           : name,
                 "core_electrons" : core_electrons,
                 "lmax"           : lmax,
+                "version"        : 1,
                 "n_el"           : None}
 
         if duplicate_handling == "ignore":  # simply filter duplicates
-            pass
+            if exists(data):
+                return []
 
         elif duplicate_handling == "error":
-            pass
+            if exists(data):
+                raise UniquenessError(
+                    f"Gaussian Pseudopotential already exists for"
+                    f" element={data['element']}, name={data['name']}: {latest.uuid}"
+                )
 
         elif duplicate_handling == "new":
-            pass
+            if exists(data):
+                latest = cls.get(data["element"], data["name"], match_aliases=False)
+                data["version"] = latest.version + 1
 
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")

From 2a5d0031fcd162f936c2ed27d995e226be7380dc Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 22 Sep 2021 16:58:58 +0200
Subject: [PATCH 11/47] Add first from_gamess tester

---
 tests/GAMESS_ECP.B        |  7 +++++++
 tests/test_pseudo_data.py | 15 +++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 tests/GAMESS_ECP.B

diff --git a/tests/GAMESS_ECP.B b/tests/GAMESS_ECP.B
new file mode 100644
index 0000000..9ab1f03
--- /dev/null
+++ b/tests/GAMESS_ECP.B
@@ -0,0 +1,7 @@
+B-ccECP GEN 2 1
+3
+ 1.00000   1 30.0000
+ 100.000   3 22.0000
+-1.00000   2  5.0000
+1
+ 20.0000   2  4.0000
diff --git a/tests/test_pseudo_data.py b/tests/test_pseudo_data.py
index d358911..ddad6e4 100644
--- a/tests/test_pseudo_data.py
+++ b/tests/test_pseudo_data.py
@@ -94,3 +94,18 @@ def test_get_matching_empty():
 
     with pytest.raises(NotExistent):
         pseudos[0].get_matching_basisset()
+
+
+def test_import_from_gamess():
+    Pseudopotential = DataFactory("gaussian.pseudo")
+
+    with open(TEST_DIR.joinpath("GAMESS_ECP.B"), "r") as fhandle:
+        # get only the He PADE pseudo
+        pseudos = Pseudopotential.from_gamess( fhandle )
+
+    assert len(pseudos) == 1
+
+    pseudos[0].store()
+
+    # check that the name is used for the node label
+    assert pseudos[0].label == pseudos[0].name

From e156d63ac3a9dc03c58b7554678bbb300f6ef80b Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Thu, 23 Sep 2021 17:53:56 +0200
Subject: [PATCH 12/47] WIP

---
 aiida_gaussian_datatypes/libraries.py         | 60 ++++++++++++++++++-
 .../pseudopotential/cli.py                    | 11 ----
 setup.json                                    |  3 +-
 3 files changed, 60 insertions(+), 14 deletions(-)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 75c8e16..482097a 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -1,12 +1,68 @@
 # -*- coding: utf-8 -*-
 # SPDX-License-Identifier: MIT
+#
+# Was there really a fish
+# That grants you that kind of wish
+#
 
+import os
+import re
+import git
+import tempfile
+import pathlib
 from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar
+from icecream import ic
+
+class LibraryBookKeeper:
+
+    classes = []
+
+    @classmethod
+    def register_library(cls, cls_):
+        cls.classes.append(cls_)
+
+    @classmethod
+    def get_libraries(cls):
+        return cls.classes
+
+    @classmethod
+    def get_library_names(cls):
+        return [ re.match("<class '[0-9A-z_\.]*\.([A-z]+)'>", str(x)).group(1) for x in cls.classes ]
+
+    @classmethod
+    def get_library_by_name(cls, name):
+        for cls_ in cls.get_libraries():
+            if re.match(f"<class '[0-9A-z_\.]*\.({name})'>", str(cls_)) is not None:
+                return cls_
+        return None
 
 class _ExternalLibrary:
 
-    def fetch(self):
+    @classmethod
+    def fetch(cls):
         pass
 
-class MitasLibrary(_ExternalLibrary):
+@LibraryBookKeeper.register_library
+class EmptyLibrary(_ExternalLibrary):
     pass
+
+@LibraryBookKeeper.register_library
+class MitasLibrary(_ExternalLibrary):
+
+    _URL = "https://github.com/QMCPACK/pseudopotentiallibrary.git"
+
+    @classmethod
+    def fetch(cls):
+        tempdir = pathlib.Path(tempfile.mkdtemp())
+        git.Repo.clone_from(cls._URL, tempdir)
+        elements = { str(sub.name): {"file" : sub} for sub in (tempdir/"recipes").iterdir() if sub.is_dir() }
+        # Add types
+        elements = {el: {**data,
+                         "types" : {x.name: {"path": x,
+                                             "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)],
+                                             "pseudo": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.ccECP\.gamess", b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()}
+        return elements
+
+
+
+
diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index 94be183..f538c3a 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -228,14 +228,3 @@ def dump_pseudo(sym, name, tags, output_format, data):
             echo.echo_info("Dumping {}/{} ({})...".format(pseudo.name, pseudo.element, pseudo.uuid), err=True)
 
         writers[output_format](pseudo, sys.stdout)
-
-# fmt: off
-@cli.command('install')
-@click.argument('pseudopotential_library', type=click.Choice(,)
-@decorators.with_dbenv()
-# fmt: on
-def install_family(pseudopotential_library):
-    """
-    Installs a family of pseudo potentials from a remote repository
-    """
-    pass
diff --git a/setup.json b/setup.json
index 6d59c57..6a20664 100644
--- a/setup.json
+++ b/setup.json
@@ -28,7 +28,8 @@
         ],
         "aiida.cmdline.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.cli:cli",
-            "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.cli:cli"
+            "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.cli:cli",
+            "gaussian = aiida_gaussian_datatypes.fetcher.cli:cli"
         ],
         "aiida.groups": [
             "gaussian.basisset = aiida_gaussian_datatypes.groups:BasisSetGroup",

From cdcbfddfe906f27bd780d407ea3c860e0b9a8a37 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 24 Sep 2021 12:32:44 +0200
Subject: [PATCH 13/47] Get rid of the ridiculous lineliner in libraries.py

---
 aiida_gaussian_datatypes/libraries.py | 51 +++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 482097a..ec50ae6 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -10,6 +10,7 @@
 import git
 import tempfile
 import pathlib
+from aiida_gaussian_datatypes import utils
 from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar
 from icecream import ic
 
@@ -53,15 +54,53 @@ class MitasLibrary(_ExternalLibrary):
 
     @classmethod
     def fetch(cls):
+
+        elements = {}
+        def add_row(p, elements = elements):
+            element = str(p.parent.parent.name)
+            if element not in utils.SYM2NUM: # Check if element is valid
+                return
+            element_path = p.parent.parent
+
+            typ = str(p.parent.name)
+            typ_path = str(p.parent.name)
+
+            if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", p.name):
+                nature = "basis"
+            elif re.match("[A-z]{1,2}\.ccECP\.gamess", p.name):
+                nature = "pseudos"
+            else:
+                """
+                If does not match these regexes do nothing
+                """
+                return
+
+            if element not in elements:
+                elements[element] = {"path": element_path,
+                                     "types": {}}
+
+            if typ not in elements[element]["types"]:
+                elements[element]["types"][typ] = {"path": typ_path,
+                                                   "basis": [],
+                                                   "pseudos": []}
+
+            elements[element]["types"][typ][nature].append(p)
+
+
         tempdir = pathlib.Path(tempfile.mkdtemp())
         git.Repo.clone_from(cls._URL, tempdir)
-        elements = { str(sub.name): {"file" : sub} for sub in (tempdir/"recipes").iterdir() if sub.is_dir() }
-        # Add types
-        elements = {el: {**data,
-                         "types" : {x.name: {"path": x,
-                                             "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)],
-                                             "pseudo": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.ccECP\.gamess", b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()}
+
+        for p in (tempdir/"recipes").glob("**/*"):
+            if str(p.name).lower().endswith(".gamess"):
+                add_row(p)
+
         return elements
+#                        elements = {el: {**data,
+#-                         "types" : {x.name: {"path": x,
+#-                                             "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)],
+#-                                             "pseudo": [ b for b in x.iterdir() if re.match(, b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()}
+#-        return elements
+
 
 
 

From 63425d08db90bc027a19952cbf138f553073e120 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 24 Sep 2021 13:42:11 +0200
Subject: [PATCH 14/47] Add different formater for import tables

---
 aiida_gaussian_datatypes/fetcher/cli.py | 89 +++++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 aiida_gaussian_datatypes/fetcher/cli.py

diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
new file mode 100644
index 0000000..07c62bc
--- /dev/null
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+import click
+import tabulate
+from aiida.cmdline.utils import decorators, echo
+from aiida.cmdline.commands.cmd_data import verdi_data
+from ..libraries import *
+
+def _formatted_table_import(elements):
+    """generates a formatted table (using tabulate) for importable basis and PPs"""
+
+    def _boldformater(f):
+
+        def fout(*args, **kwargs):
+            if args[1] % 2 == 1:
+                return ( f"\033[1m{x}\033[0m" for x in f(*args, **kwargs))
+            else:
+                return ( x for x in f(*args, **kwargs))
+        return fout
+
+    class row():
+
+        num = []
+        element = []
+        t = []
+
+        @_boldformater
+        def __new__(cls, num, element, t, p, b):
+
+            if element in cls.element:
+                element = ""
+            else:
+                cls.element.append(element)
+                element = str(element)
+                cls.t = []
+
+            if num in cls.num:
+                num = ""
+            else:
+                cls.num.append(num)
+                num = str(num)
+
+            if t in cls.t:
+                t = ""
+            else:
+                cls.t.append(t)
+
+            if t == "":
+                p = ""
+
+            return (
+                num,
+                element,
+                t,
+                p,
+                b
+            )
+
+    table_content = []
+    for ii, (e, d) in enumerate(elements.items()):
+        for t in d["types"]:
+            if len(d["types"][t]["pseudos"]) == 0:
+                continue
+            p = d["types"][t]["pseudos"][0]
+            for b in d["types"][t]["basis"]:
+                table_content.append(row(ii, e, t, p.name, b.name))
+
+    #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())]
+    return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "Pseudo", "Basis"])
+
+@verdi_data.group("gaussian")
+def cli():
+    """Manage Pseudopotentials for GTO-based codes"""
+
+# fmt: off
+@cli.command('fetch')
+@click.argument('library',
+                type=click.Choice(LibraryBookKeeper.get_library_names()))
+@decorators.with_dbenv()
+# fmt: on
+def install_family(library):
+    """
+    Installs a family of pseudo potentials from a remote repository
+    """
+    elements = LibraryBookKeeper.get_library_by_name(library).fetch()
+    echo.echo_info(f"Found {len(elements)} elements")
+    echo.echo(_formatted_table_import(elements))
+
+

From 76f9358259ba2f8ef97eccb64615f02fea75b05e Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 24 Sep 2021 14:23:04 +0200
Subject: [PATCH 15/47] WIP

---
 aiida_gaussian_datatypes/basisset/cli.py  |  4 +-
 aiida_gaussian_datatypes/basisset/data.py | 53 ++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py
index 3615ef7..45fa3d2 100644
--- a/aiida_gaussian_datatypes/basisset/cli.py
+++ b/aiida_gaussian_datatypes/basisset/cli.py
@@ -70,7 +70,8 @@ def cli():
     multiple=True,
     help="filter by a tag (all tags must be present if specified multiple times)")
 @click.option(
-    'fformat', '-f', '--format', type=click.Choice(['cp2k']), default='cp2k',
+    'fformat', '-f', '--format', type=click.Choice(['cp2k',
+                                                    'gamess']), default='cp2k',
     help="the format of the basis set file")
 @click.option(
     '--duplicates',
@@ -89,6 +90,7 @@ def import_basisset(basisset_file, fformat, sym, tags, duplicates, group):
 
     loaders = {
         "cp2k": BasisSet.from_cp2k,
+        "gamess": BasisSet.from_gamess,
     }
 
     filters = {
diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 72c9622..7ea263b 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -236,7 +236,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab
         return items[0][0]
 
     @classmethod
-    def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore"):
+    def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore", element = None):
         """
         Constructs a list with basis set objects from a Basis Set in CP2K format
 
@@ -324,6 +324,57 @@ def decimal2str(val):
 
         return [cls(**bs) for bs in bsets]
 
+    @classmethod
+    def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", element = None):
+        """
+        Constructs a list with basis set objects from a Basis Set in GAMESS format
+
+        :param fhandle: open file handle
+        :param filters: a dict with attribute filter functions
+        :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version))
+        :rtype: list
+        """
+
+        if not element:
+            raise ValueError(f"Element has to be set!")
+
+
+        """
+        GAMESS parser
+        """
+
+        if duplicate_handling == "ignore":  # simply filter duplicates
+            #bsets = [bs for bs in bsets if not exists(bs)]
+            pass
+
+        elif duplicate_handling == "error":
+            #for bset in bsets:
+            #    try:
+            #        latest = cls.get(bset["element"], bset["name"], match_aliases=False)
+            #    except NotExistent:
+            #        pass
+            #    else:
+            #        raise UniquenessError(
+            #            f"Gaussian Basis Set already exists for"
+            #            f" element={bset['element']}, name={bset['name']}: {latest.uuid}"
+            #        )
+            pass
+
+        elif duplicate_handling == "new":
+            #for bset in bsets:
+            #    try:
+            #        latest = cls.get(bset["element"], bset["name"], match_aliases=False)
+            #    except NotExistent:
+            #        pass
+            #    else:
+            #        bset["version"] = latest.version + 1
+            pass
+
+        else:
+            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
+
+        return []
+
     def to_cp2k(self, fhandle):
         """
         Write the Basis Set to the passed file handle in the format expected by CP2K.

From f3ee4c0bec157397aaea98d56b857517d81eba78 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 24 Sep 2021 15:18:10 +0200
Subject: [PATCH 16/47] Change importter to import basis from nwchem. Problem
 with gamess format is it does not stores the element symbol.

---
 aiida_gaussian_datatypes/fetcher/cli.py |  3 ++-
 aiida_gaussian_datatypes/libraries.py   | 13 ++-----------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index 07c62bc..dfc717b 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -53,6 +53,7 @@ def __new__(cls, num, element, t, p, b):
                 element,
                 t,
                 p,
+                re.match("[A-z]{1,2}\.(.+).nwchem", b).group(1),
                 b
             )
 
@@ -66,7 +67,7 @@ def __new__(cls, num, element, t, p, b):
                 table_content.append(row(ii, e, t, p.name, b.name))
 
     #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())]
-    return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "Pseudo", "Basis"])
+    return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Basis", "BasisFile"])
 
 @verdi_data.group("gaussian")
 def cli():
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index ec50ae6..039ee43 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -65,7 +65,7 @@ def add_row(p, elements = elements):
             typ = str(p.parent.name)
             typ_path = str(p.parent.name)
 
-            if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", p.name):
+            if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.nwchem", p.name):
                 nature = "basis"
             elif re.match("[A-z]{1,2}\.ccECP\.gamess", p.name):
                 nature = "pseudos"
@@ -91,17 +91,8 @@ def add_row(p, elements = elements):
         git.Repo.clone_from(cls._URL, tempdir)
 
         for p in (tempdir/"recipes").glob("**/*"):
-            if str(p.name).lower().endswith(".gamess"):
+            if str(p.name).lower().endswith(".gamess") or str(p.name).lower().endswith(".nwchem"):
                 add_row(p)
 
         return elements
-#                        elements = {el: {**data,
-#-                         "types" : {x.name: {"path": x,
-#-                                             "basis": [ b for b in x.iterdir() if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.gamess", b.name)],
-#-                                             "pseudo": [ b for b in x.iterdir() if re.match(, b.name)]} for x in data["file"].iterdir() if x.is_dir()}} for el, data in elements.items()}
-#-        return elements
-
-
-
-
 

From 04cffdfc51f1c331c85912db9cc4044aea758267 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 24 Sep 2021 18:02:19 +0200
Subject: [PATCH 17/47] WIP

---
 aiida_gaussian_datatypes/basisset/cli.py  | 4 ++--
 aiida_gaussian_datatypes/basisset/data.py | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py
index 45fa3d2..7d1c91c 100644
--- a/aiida_gaussian_datatypes/basisset/cli.py
+++ b/aiida_gaussian_datatypes/basisset/cli.py
@@ -71,7 +71,7 @@ def cli():
     help="filter by a tag (all tags must be present if specified multiple times)")
 @click.option(
     'fformat', '-f', '--format', type=click.Choice(['cp2k',
-                                                    'gamess']), default='cp2k',
+                                                    'nwchem']), default='cp2k',
     help="the format of the basis set file")
 @click.option(
     '--duplicates',
@@ -90,7 +90,7 @@ def import_basisset(basisset_file, fformat, sym, tags, duplicates, group):
 
     loaders = {
         "cp2k": BasisSet.from_cp2k,
-        "gamess": BasisSet.from_gamess,
+        "nwchem": BasisSet.from_nwchem,
     }
 
     filters = {
diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 7ea263b..9b907b6 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -325,9 +325,9 @@ def decimal2str(val):
         return [cls(**bs) for bs in bsets]
 
     @classmethod
-    def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", element = None):
+    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element = None):
         """
-        Constructs a list with basis set objects from a Basis Set in GAMESS format
+        Constructs a list with basis set objects from a Basis Set in NWCHEM format
 
         :param fhandle: open file handle
         :param filters: a dict with attribute filter functions
@@ -340,8 +340,10 @@ def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", element
 
 
         """
-        GAMESS parser
+        NWCHEM parser
         """
+        for line in fhande:
+
 
         if duplicate_handling == "ignore":  # simply filter duplicates
             #bsets = [bs for bs in bsets if not exists(bs)]

From 506664f47f13b8cdcfff7e3a096f05455e919af5 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 24 Sep 2021 18:03:23 +0200
Subject: [PATCH 18/47] Forgot to add __init__.py

---
 aiida_gaussian_datatypes/fetcher/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 aiida_gaussian_datatypes/fetcher/__init__.py

diff --git a/aiida_gaussian_datatypes/fetcher/__init__.py b/aiida_gaussian_datatypes/fetcher/__init__.py
new file mode 100644
index 0000000..e69de29

From 188fe1de416ecee71e9a00f2d786ec6cd2b9820b Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Sun, 26 Sep 2021 00:15:49 +0200
Subject: [PATCH 19/47] Add nwchem loader for basis

---
 aiida_gaussian_datatypes/basisset/data.py | 70 +++++++++++++++++++----
 1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 9b907b6..9f87a6e 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -14,8 +14,11 @@
     NotExistent,
     UniquenessError,
     ValidationError,
+    ParsingError
 )
+import re
 from aiida.orm import Data, Group
+from icecream import ic
 
 
 class BasisSet(Data):
@@ -82,14 +85,15 @@ def _validate(self):
             # directly raises an exception for the data if something's amiss, extra fields are ignored
             BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes})
 
-            assert isinstance(self.name, str) and self.name
+            #assert isinstance(self.name, str) and self.name
+            ic(self.aliases)
             assert (
                 isinstance(self.aliases, list)
                 and all(isinstance(alias, str) for alias in self.aliases)
                 and self.aliases
             )
-            assert isinstance(self.tags, list) and all(isinstance(tag, str) for tag in self.tags)
-            assert isinstance(self.version, int) and self.version > 0
+            #assert isinstance(self.tags, list) and all(isinstance(tag, str) for tag in self.tags)
+            #assert isinstance(self.version, int) and self.version > 0
         except Exception as exc:
             raise ValidationError("One or more invalid fields found") from exc
 
@@ -236,7 +240,7 @@ def get(cls, element, name=None, version="latest", match_aliases=True, group_lab
         return items[0][0]
 
     @classmethod
-    def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore", element = None):
+    def from_cp2k(cls, fhandle, filters=None, duplicate_handling="ignore"):
         """
         Constructs a list with basis set objects from a Basis Set in CP2K format
 
@@ -325,7 +329,7 @@ def decimal2str(val):
         return [cls(**bs) for bs in bsets]
 
     @classmethod
-    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element = None):
+    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore"):
         """
         Constructs a list with basis set objects from a Basis Set in NWCHEM format
 
@@ -335,15 +339,51 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element
         :rtype: list
         """
 
-        if not element:
-            raise ValueError(f"Element has to be set!")
-
 
         """
         NWCHEM parser
         """
-        for line in fhande:
 
+        element = None
+        data = []
+        blocks = []
+
+        def block_creator(b, orb, blocks = blocks):
+            orb_dict = {"s" : 0,
+                        "p" : 1,
+                        "d" : 2,
+                        "f" : 3,
+                        "g" : 4,
+                        "h" : 5,
+                        "i" : 6 }
+            block = { "n": 0, # I dont know how to setup main quantum number
+                      "l": [(orb_dict[orb], len(data))],
+                      "coefficients" : [ [ d["exp"], d["cont"] ] for d in b ] }
+            blocks.append(block)
+
+        for line in fhandle:
+            """
+            Element symbol has to be every block
+            """
+            if re.match("^[A-z ]+$", line):
+                if len(data) != 0:
+                    block_creator(data, orb)
+                    data = []
+                el, orb, = line.lower().split()
+                if element is None:
+                    """
+                    TODO check validity of element
+                    """
+                    element = el
+                elif element != el:
+                    raise ParsingError(f"Element previous {element}, and now {el}.") # Element cannot be changed
+            if re.match("^[+-.0-9 ]+$", line):
+                exp, cont, = [ float(x) for x in line.split() ]
+                data.append({"exp" : exp,
+                             "cont" : cont })
+        if len(data) != 0:
+            block_creator(data, orb)
+            data = []
 
         if duplicate_handling == "ignore":  # simply filter duplicates
             #bsets = [bs for bs in bsets if not exists(bs)]
@@ -375,7 +415,17 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", element
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
-        return []
+        basis = {"element" : element.capitalize(),
+                 "version" : 1,
+                 "tags" : [],
+                 "aliases" : ["nwchem"],
+                 "blocks" : blocks }
+
+        if hasattr(fhandle, "name"):
+            basis["name"] = fhandle.name
+            basis["aliases"].append(fhandle.name.replace(".nwchem", ""))
+
+        return [cls(**basis)]
 
     def to_cp2k(self, fhandle):
         """

From 24b4c55cc13f02752303e34c8795a16a58e617c5 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Sun, 26 Sep 2021 00:46:54 +0200
Subject: [PATCH 20/47] WIP

---
 aiida_gaussian_datatypes/basisset/data.py |  1 -
 aiida_gaussian_datatypes/fetcher/cli.py   | 30 +++++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 9f87a6e..2779c00 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -86,7 +86,6 @@ def _validate(self):
             BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes})
 
             #assert isinstance(self.name, str) and self.name
-            ic(self.aliases)
             assert (
                 isinstance(self.aliases, list)
                 and all(isinstance(alias, str) for alias in self.aliases)
diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index dfc717b..521848e 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -5,6 +5,10 @@
 from aiida.cmdline.utils import decorators, echo
 from aiida.cmdline.commands.cmd_data import verdi_data
 from ..libraries import *
+from ..basisset.data import BasisSet
+from ..pseudopotential.data import Pseudopotential
+
+from ..utils import click_parse_range  # pylint: disable=relative-beyond-top-level
 
 def _formatted_table_import(elements):
     """generates a formatted table (using tabulate) for importable basis and PPs"""
@@ -86,5 +90,31 @@ def install_family(library):
     elements = LibraryBookKeeper.get_library_by_name(library).fetch()
     echo.echo_info(f"Found {len(elements)} elements")
     echo.echo(_formatted_table_import(elements))
+    echo.echo("")
+    indexes = click.prompt(
+        "Which Elements do you want to add?"
+        " ('n' for none, 'a' for all, comma-seperated list or range of numbers)",
+        value_proc=lambda v: click_parse_range(v, len(elements)))
+    ic(elements)
+    ic(indexes)
+    for idx in indexes:
+        e = elements[{1: "H"}[idx+1]]
+        for t, o in e["types"].items():
+            for b in o["basis"]:
+                with open(str(b), "r") as fhandle:
+                    basis, = BasisSet.from_nwchem(fhandle)
+                    echo.echo(f"Adding Basis for: {basis.element} ({basis.name})...  ", nl=False)
+                    echo.echo("DONE")
+            for p in o["pseudos"]:
+                with open(str(p), "r") as fhandle:
+                    pseudo, = Pseudopotential.from_gamess(fhandle)
+                    echo.echo(f"Adding Pseudo for: {pseudo.element} ({pseudo.name})... ", nl=False)
+                    echo.echo("DONE")
+
+
+    #    echo.echo_info(
+    #        "Adding Objects for: {p.element} ({p.name})... ".format(p=pseudos[idx]), nl=False)
+    #    pseudos[idx].store()
+    #    echo.echo("DONE")
 
 

From f3e89c523ce37cb90a72dba8274bbba549eb92aa Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Mon, 27 Sep 2021 16:36:27 +0200
Subject: [PATCH 21/47] WIP

---
 aiida_gaussian_datatypes/basisset/data.py |  14 +--
 aiida_gaussian_datatypes/fetcher/cli.py   | 101 +++++++++++++++++-----
 aiida_gaussian_datatypes/libraries.py     |   3 +-
 3 files changed, 89 insertions(+), 29 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 2779c00..aa59b8c 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -18,6 +18,7 @@
 )
 import re
 from aiida.orm import Data, Group
+from pathlib import Path
 from icecream import ic
 
 
@@ -328,7 +329,7 @@ def decimal2str(val):
         return [cls(**bs) for bs in bsets]
 
     @classmethod
-    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore"):
+    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", name = None):
         """
         Constructs a list with basis set objects from a Basis Set in NWCHEM format
 
@@ -416,13 +417,16 @@ def block_creator(b, orb, blocks = blocks):
 
         basis = {"element" : element.capitalize(),
                  "version" : 1,
+                 "name" : "unknown",
                  "tags" : [],
-                 "aliases" : ["nwchem"],
+                 "aliases" : [""],
                  "blocks" : blocks }
 
-        if hasattr(fhandle, "name"):
-            basis["name"] = fhandle.name
-            basis["aliases"].append(fhandle.name.replace(".nwchem", ""))
+        if name is not None:
+            basis["name"] = name
+        elif hasattr(fhandle, "name"):
+            basis["name"] = Path(fhandle.name).name.replace(".nwchem", "")
+            basis["aliases"].append(basis["name"].split(".")[-1])
 
         return [cls(**basis)]
 
diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index 521848e..5a59087 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -2,13 +2,28 @@
 
 import click
 import tabulate
+from pathlib import Path
 from aiida.cmdline.utils import decorators, echo
 from aiida.cmdline.commands.cmd_data import verdi_data
+from aiida.orm import load_group
 from ..libraries import *
 from ..basisset.data import BasisSet
 from ..pseudopotential.data import Pseudopotential
-
-from ..utils import click_parse_range  # pylint: disable=relative-beyond-top-level
+#from ..groups import (
+#    BasisSetGroup,
+#    PseudopotenialGroup,
+#)
+from ..groups import BasisSetGroup
+from ..groups import PseudopotentialGroup
+
+#from ..utils import (
+#    click_parse_range,  # pylint: disable=relative-beyond-top-level
+#    SYM2NUM,
+#)
+
+from ..utils import click_parse_range
+from ..utils import SYM2NUM
+from aiida.common.exceptions import UniquenessError
 
 def _formatted_table_import(elements):
     """generates a formatted table (using tabulate) for importable basis and PPs"""
@@ -29,7 +44,7 @@ class row():
         t = []
 
         @_boldformater
-        def __new__(cls, num, element, t, p, b):
+        def __new__(cls, num, element, t, p, tags, b):
 
             if element in cls.element:
                 element = ""
@@ -51,27 +66,29 @@ def __new__(cls, num, element, t, p, b):
 
             if t == "":
                 p = ""
+                tags = []
 
             return (
                 num,
                 element,
                 t,
                 p,
+                " ".join(tags),
                 re.match("[A-z]{1,2}\.(.+).nwchem", b).group(1),
                 b
             )
 
     table_content = []
-    for ii, (e, d) in enumerate(elements.items()):
+    for ii, (e, d) in enumerate(elements):
         for t in d["types"]:
             if len(d["types"][t]["pseudos"]) == 0:
                 continue
             p = d["types"][t]["pseudos"][0]
             for b in d["types"][t]["basis"]:
-                table_content.append(row(ii, e, t, p.name, b.name))
+                table_content.append(row(ii, e, t, p.name, d["types"][t]["tags"], b.name))
 
     #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())]
-    return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Basis", "BasisFile"])
+    return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"])
 
 @verdi_data.group("gaussian")
 def cli():
@@ -87,7 +104,27 @@ def install_family(library):
     """
     Installs a family of pseudo potentials from a remote repository
     """
+
+    basissetgname = f"{library}-basis"
+    try:
+        basisgroup = load_group(basissetgname)
+    except:
+        echo.echo_info("Creating library basis group ... ", nl = False)
+        basisgroup = BasisSetGroup(basissetgname)
+        basisgroup.store()
+        echo.echo("DONE")
+
+    pseudogname = f"{library}-pseudo"
+    try:
+        pseudogroup = load_group(pseudogname)
+    except:
+        echo.echo_info("Creating library pseudo group ... ", nl = False)
+        pseudogroup = PseudopotentialGroup(pseudogname)
+        pseudogroup.store()
+        echo.echo("DONE")
+
     elements = LibraryBookKeeper.get_library_by_name(library).fetch()
+    elements = [ [el, p] for el, p in sorted(elements.items(), key = lambda x: SYM2NUM[x[0]]) ]
     echo.echo_info(f"Found {len(elements)} elements")
     echo.echo(_formatted_table_import(elements))
     echo.echo("")
@@ -95,26 +132,44 @@ def install_family(library):
         "Which Elements do you want to add?"
         " ('n' for none, 'a' for all, comma-seperated list or range of numbers)",
         value_proc=lambda v: click_parse_range(v, len(elements)))
-    ic(elements)
-    ic(indexes)
     for idx in indexes:
-        e = elements[{1: "H"}[idx+1]]
-        for t, o in e["types"].items():
+        e, v = elements[idx]
+        for t, o in v["types"].items():
             for b in o["basis"]:
                 with open(str(b), "r") as fhandle:
-                    basis, = BasisSet.from_nwchem(fhandle)
-                    echo.echo(f"Adding Basis for: {basis.element} ({basis.name})...  ", nl=False)
-                    echo.echo("DONE")
+                    try:
+                        basis, = BasisSet.from_nwchem(fhandle,
+                                                      duplicate_handling = "new",
+                                                      name = f"{t}.{Path(fhandle.name).name}"
+                                                     )
+                        if basis is None:
+                            continue
+                        echo.echo_info(f"Adding Basis for: ", nl=False)
+                        echo.echo(f"{basis.element} ({basis.name})...  ", nl=False)
+                        basis.tags.extend(o["tags"])
+                        basis.store()
+                        basisgroup.add_nodes([basis])
+                        echo.echo("Imported")
+                    except UniquenessError:
+                        echo.echo("Skipping (already in)")
+                    except Exception as e:
+                        echo.echo("Skipping (something went wrong)")
             for p in o["pseudos"]:
                 with open(str(p), "r") as fhandle:
-                    pseudo, = Pseudopotential.from_gamess(fhandle)
-                    echo.echo(f"Adding Pseudo for: {pseudo.element} ({pseudo.name})... ", nl=False)
-                    echo.echo("DONE")
-
-
-    #    echo.echo_info(
-    #        "Adding Objects for: {p.element} ({p.name})... ".format(p=pseudos[idx]), nl=False)
-    #    pseudos[idx].store()
-    #    echo.echo("DONE")
-
+                    try:
+                        pseudo, = Pseudopotential.from_gamess(fhandle,
+                                                              duplicate_handling = "new"
+                                                             )
+                        if pseudo is None:
+                            continue
+                        echo.echo_info(f"Adding Basis for: ", nl=False)
+                        echo.echo(f"{pseudo.element} ({pseudo.name})...  ", nl=False)
+                        pseudo.tags.extend(o["tags"])
+                        pseudo.store()
+                        pseudogroup.add_nodes([pseudo])
+                        echo.echo("Imported")
+                    except UniquenessError:
+                        echo.echo("Skipping (already in)")
+                    except Exception as e:
+                        echo.echo("Skipping (something went wrong)")
 
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 039ee43..d7aeaed 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -82,7 +82,8 @@ def add_row(p, elements = elements):
             if typ not in elements[element]["types"]:
                 elements[element]["types"][typ] = {"path": typ_path,
                                                    "basis": [],
-                                                   "pseudos": []}
+                                                   "pseudos": [],
+                                                   "tags": ["ECP", typ, ]}
 
             elements[element]["types"][typ][nature].append(p)
 

From 843dbb614130d3600fdd27f0a020631f91d21732 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Tue, 28 Sep 2021 10:57:02 +0200
Subject: [PATCH 22/47] WIP

---
 aiida_gaussian_datatypes/fetcher/cli.py       | 11 +++--
 aiida_gaussian_datatypes/libraries.py         | 46 +++++++++++++++++--
 .../pseudopotential/cli.py                    |  6 ++-
 .../pseudopotential/data.py                   | 23 +++++++++-
 4 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index 5a59087..4d684df 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -85,7 +85,7 @@ def __new__(cls, num, element, t, p, tags, b):
                 continue
             p = d["types"][t]["pseudos"][0]
             for b in d["types"][t]["basis"]:
-                table_content.append(row(ii, e, t, p.name, d["types"][t]["tags"], b.name))
+                table_content.append(row(ii, e, t, p["path"].name, d["types"][t]["tags"], b["path"].name))
 
     #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())]
     return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"])
@@ -124,6 +124,7 @@ def install_family(library):
         echo.echo("DONE")
 
     elements = LibraryBookKeeper.get_library_by_name(library).fetch()
+
     elements = [ [el, p] for el, p in sorted(elements.items(), key = lambda x: SYM2NUM[x[0]]) ]
     echo.echo_info(f"Found {len(elements)} elements")
     echo.echo(_formatted_table_import(elements))
@@ -147,8 +148,8 @@ def install_family(library):
                         echo.echo_info(f"Adding Basis for: ", nl=False)
                         echo.echo(f"{basis.element} ({basis.name})...  ", nl=False)
                         basis.tags.extend(o["tags"])
-                        basis.store()
-                        basisgroup.add_nodes([basis])
+                        #basis.store()
+                        #basisgroup.add_nodes([basis])
                         echo.echo("Imported")
                     except UniquenessError:
                         echo.echo("Skipping (already in)")
@@ -165,8 +166,8 @@ def install_family(library):
                         echo.echo_info(f"Adding Basis for: ", nl=False)
                         echo.echo(f"{pseudo.element} ({pseudo.name})...  ", nl=False)
                         pseudo.tags.extend(o["tags"])
-                        pseudo.store()
-                        pseudogroup.add_nodes([pseudo])
+                        #pseudo.store()
+                        #pseudogroup.add_nodes([pseudo])
                         echo.echo("Imported")
                     except UniquenessError:
                         echo.echo("Skipping (already in)")
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index d7aeaed..912d10b 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -4,7 +4,6 @@
 # Was there really a fish
 # That grants you that kind of wish
 #
-
 import os
 import re
 import git
@@ -13,6 +12,8 @@
 from aiida_gaussian_datatypes import utils
 from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar
 from icecream import ic
+from .basisset.data import BasisSet
+from .pseudopotential.data import Pseudopotential
 
 class LibraryBookKeeper:
 
@@ -84,8 +85,35 @@ def add_row(p, elements = elements):
                                                    "basis": [],
                                                    "pseudos": [],
                                                    "tags": ["ECP", typ, ]}
-
-            elements[element]["types"][typ][nature].append(p)
+            val = {}
+            val["path"] = p
+            with open(p, "r") as fhandle:
+                if nature == "basis":
+                    try:
+                        obj, = BasisSet.from_nwchem(fhandle,
+                                                   duplicate_handling = "new")
+                    except:
+                        """
+                        Something went wrong in the import, continuing ...
+                        """
+                        return
+                    tags = ["aug"]
+                elif nature == "pseudos":
+                    try:
+                        obj, = Pseudopotential.from_gamess(fhandle,
+                                                          duplicate_handling = "new")
+                    except:
+                        """
+                        Something went wrong in the import, continuing ...
+                        """
+                        return
+                    tags = []
+                else:
+                    raise # TODO give here an error
+            obj.tags.extend(tags)
+            val["obj"] = obj
+            val["tags"] = tags
+            elements[element]["types"][typ][nature].append(val)
 
 
         tempdir = pathlib.Path(tempfile.mkdtemp())
@@ -95,5 +123,17 @@ def add_row(p, elements = elements):
             if str(p.name).lower().endswith(".gamess") or str(p.name).lower().endswith(".nwchem"):
                 add_row(p)
 
+        """ Update valence electrons """
+        for e in elements:
+            for t in elements[e]["types"]:
+                if len(elements[e]["types"][t]["pseudos"]) == 1:
+                    tags = [f'q{elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot}',
+                            f'c{elements[e]["types"][t]["pseudos"][0]["obj"].core_electrons}'
+                           ]
+                    elements[e]["types"][t]["tags"].extend(tags)
+                    for ii, b in enumerate(elements[e]["types"][t]["basis"]):
+                        elements[e]["types"][t]["basis"][ii]["obj"].n_el = elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot
+
+
         return elements
 
diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index f538c3a..32eb64c 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -33,11 +33,12 @@ def row(num, pseudo):
             _names_column(pseudo.name, pseudo.aliases),
             ", ".join(pseudo.tags),
             ", ".join(f"{n:2d}" for n in pseudo.n_el + (3 - len(pseudo.n_el)) * [0]),
+            pseudo.n_el_tot,
             pseudo.version,
         )
 
     table_content = [row(n, p) for n, p in enumerate(pseudos)]
-    return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"])
+    return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Tot. val. e⁻", "Version"])
 
 
 def _formatted_table_list(pseudos):
@@ -51,11 +52,12 @@ def row(pseudo):
             _names_column(pseudo.name, pseudo.aliases),
             ", ".join(pseudo.tags),
             ", ".join(f"{n:2d}" for n in pseudo.n_el + (3 - len(pseudo.n_el)) * [0]),
+            pseudo.n_el_tot,
             pseudo.version,
         )
 
     table_content = [row(p) for p in pseudos]
-    return tabulate.tabulate(table_content, headers=["ID", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Version"])
+    return tabulate.tabulate(table_content, headers=["Nr.", "Type", "Sym", "Names", "Tags", "Val. e⁻ (s, p, ..)", "Tot. val. e⁻", "Version"])
 
 
 @verdi_data.group("gaussian.pseudo")
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index c48bf6c..6b1c3ba 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -7,6 +7,7 @@
 """
 
 import dataclasses
+from ..utils import SYM2NUM
 from decimal import Decimal
 from icecream import ic
 
@@ -34,6 +35,7 @@ def __init__(
         aliases=None,
         tags=None,
         n_el=None,
+        n_el_tot=None,
         version=1,
         **kwargs,
     ):
@@ -53,13 +55,18 @@ def __init__(
 
         if not n_el:
             n_el = []
+        else:
+            if not n_el_tot:
+                n_el_tot = sum(n_el)
+            else:
+                raise #TODO a propiate error here
 
         if "label" not in kwargs:
             kwargs["label"] = name
 
         super().__init__(**kwargs)
 
-        for attr in ("name", "element", "tags", "aliases", "n_el", "version"):
+        for attr in ("name", "element", "tags", "aliases", "n_el", "n_el_tot", "version"):
             self.set_attribute(attr, locals()[attr])
 
     def store(self, *args, **kwargs):
@@ -94,6 +101,8 @@ def _validate(self):
             )
             assert isinstance(self.tags, list) and all(isinstance(tag, str) for tag in self.tags)
             assert isinstance(self.version, int) and self.version > 0
+            if len(self.n_el) != 0:
+                assert(sum(self.n_el) == self.n_el_tot)
         except Exception as exc:
             raise ValidationError("One or more invalid fields found") from exc
 
@@ -151,6 +160,15 @@ def n_el(self):
 
         return self.get_attribute("n_el", [])
 
+    @property
+    def n_el_tot(self):
+        """
+        Return the number of electrons per angular momentum
+        :rtype:int
+        """
+
+        return self.get_attribute("n_el_tot", [])
+
     @classmethod
     def get(cls, element, name=None, version="latest", match_aliases=True, group_label=None, n_el=None):
         """
@@ -362,7 +380,8 @@ def exists(pseudo):
                 "core_electrons" : core_electrons,
                 "lmax"           : lmax,
                 "version"        : 1,
-                "n_el"           : None}
+                "n_el"           : None,
+                "n_el_tot"       : SYM2NUM[element] - core_electrons}
 
         if duplicate_handling == "ignore":  # simply filter duplicates
             if exists(data):

From 61cf7eb91f5be86f6dc268aa8e87b64058151fad Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Tue, 28 Sep 2021 12:27:47 +0200
Subject: [PATCH 23/47] WIP

---
 aiida_gaussian_datatypes/basisset/data.py | 37 ++++++---
 aiida_gaussian_datatypes/fetcher/cli.py   | 61 ++++++---------
 aiida_gaussian_datatypes/libraries.py     | 93 +++++++++--------------
 3 files changed, 87 insertions(+), 104 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index aa59b8c..adcc9f9 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -329,7 +329,7 @@ def decimal2str(val):
         return [cls(**bs) for bs in bsets]
 
     @classmethod
-    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", name = None):
+    def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = None):
         """
         Constructs a list with basis set objects from a Basis Set in NWCHEM format
 
@@ -348,6 +348,9 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", name =
         data = []
         blocks = []
 
+        if not attrs:
+            attrs = {}
+
         def block_creator(b, orb, blocks = blocks):
             orb_dict = {"s" : 0,
                         "p" : 1,
@@ -415,19 +418,31 @@ def block_creator(b, orb, blocks = blocks):
         else:
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
-        basis = {"element" : element.capitalize(),
-                 "version" : 1,
-                 "name" : "unknown",
-                 "tags" : [],
-                 "aliases" : [""],
-                 "blocks" : blocks }
-
-        if name is not None:
-            basis["name"] = name
-        elif hasattr(fhandle, "name"):
+        try:
+            basis = {"element" : element.capitalize(),
+                     "version" : 1,
+                     "name" : "unknown",
+                     "tags" : [],
+                     "aliases" : [],
+                     "blocks" : blocks }
+        except:
+            return []
+
+        if hasattr(fhandle, "name"):
             basis["name"] = Path(fhandle.name).name.replace(".nwchem", "")
             basis["aliases"].append(basis["name"].split(".")[-1])
 
+        if "name" in attrs:
+            basis["aliases"].append(basis["name"])
+            basis["name"] = attrs["name"]
+
+        for attr in ("n_el", "tags",):
+            if attr in attrs:
+                basis[attr] = attrs[attr]
+
+        if len(basis["aliases"]) == 0:
+            del basis["aliases"]
+
         return [cls(**basis)]
 
     def to_cp2k(self, fhandle):
diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index 4d684df..c427833 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -85,7 +85,9 @@ def __new__(cls, num, element, t, p, tags, b):
                 continue
             p = d["types"][t]["pseudos"][0]
             for b in d["types"][t]["basis"]:
-                table_content.append(row(ii, e, t, p["path"].name, d["types"][t]["tags"], b["path"].name))
+                table_content.append(row(ii, e, t, p["path"].name,
+                                         d["types"][t]["tags"],
+                                         b["path"].name))
 
     #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())]
     return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"])
@@ -137,40 +139,27 @@ def install_family(library):
         e, v = elements[idx]
         for t, o in v["types"].items():
             for b in o["basis"]:
-                with open(str(b), "r") as fhandle:
-                    try:
-                        basis, = BasisSet.from_nwchem(fhandle,
-                                                      duplicate_handling = "new",
-                                                      name = f"{t}.{Path(fhandle.name).name}"
-                                                     )
-                        if basis is None:
-                            continue
-                        echo.echo_info(f"Adding Basis for: ", nl=False)
-                        echo.echo(f"{basis.element} ({basis.name})...  ", nl=False)
-                        basis.tags.extend(o["tags"])
-                        #basis.store()
-                        #basisgroup.add_nodes([basis])
-                        echo.echo("Imported")
-                    except UniquenessError:
-                        echo.echo("Skipping (already in)")
-                    except Exception as e:
-                        echo.echo("Skipping (something went wrong)")
+                basis = b["obj"]
+                echo.echo_info(f"Adding Basis for: ", nl=False)
+                echo.echo(f"{basis.element} ({basis.name})...  ", nl=False)
+                try:
+                    basis.store()
+                    basisgroup.add_nodes([basis])
+                    echo.echo("Imported")
+                except UniquenessError:
+                    echo.echo("Skipping (already in)")
+                except Exception as e:
+                    echo.echo("Skipping (something went wrong)")
             for p in o["pseudos"]:
-                with open(str(p), "r") as fhandle:
-                    try:
-                        pseudo, = Pseudopotential.from_gamess(fhandle,
-                                                              duplicate_handling = "new"
-                                                             )
-                        if pseudo is None:
-                            continue
-                        echo.echo_info(f"Adding Basis for: ", nl=False)
-                        echo.echo(f"{pseudo.element} ({pseudo.name})...  ", nl=False)
-                        pseudo.tags.extend(o["tags"])
-                        #pseudo.store()
-                        #pseudogroup.add_nodes([pseudo])
-                        echo.echo("Imported")
-                    except UniquenessError:
-                        echo.echo("Skipping (already in)")
-                    except Exception as e:
-                        echo.echo("Skipping (something went wrong)")
+                pseudo = p["obj"]
+                echo.echo_info(f"Adding Basis for: ", nl=False)
+                echo.echo(f"{pseudo.element} ({pseudo.name})...  ", nl=False)
+                try:
+                    pseudo.store()
+                    pseudogroup.add_nodes([pseudo])
+                    echo.echo("Imported")
+                except UniquenessError:
+                    echo.echo("Skipping (already in)")
+                except Exception as e:
+                    echo.echo("Skipping (something went wrong)")
 
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 912d10b..a8c6ac1 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -57,7 +57,7 @@ class MitasLibrary(_ExternalLibrary):
     def fetch(cls):
 
         elements = {}
-        def add_row(p, elements = elements):
+        def add_data(p, elements = elements):
             element = str(p.parent.parent.name)
             if element not in utils.SYM2NUM: # Check if element is valid
                 return
@@ -66,15 +66,36 @@ def add_row(p, elements = elements):
             typ = str(p.parent.name)
             typ_path = str(p.parent.name)
 
-            if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.nwchem", p.name):
-                nature = "basis"
-            elif re.match("[A-z]{1,2}\.ccECP\.gamess", p.name):
-                nature = "pseudos"
-            else:
-                """
-                If does not match these regexes do nothing
-                """
-                return
+            tags = ["ECP", typ, ]
+
+            """ Load Pseudopotential first """
+            with open(p, "r") as fhandle:
+                pseudo, = Pseudopotential.from_gamess(fhandle,
+                                                      duplicate_handling = "new")
+            tags.append(f"q{pseudo.n_el_tot}")
+            tags.append(f"c{pseudo.core_electrons}")
+            pseudo.tags.extend(tags)
+
+            pseudos = [{"path": p,
+                        "obj": pseudo}]
+
+
+            """ Load Basis sets """
+            basis = []
+            for r in (p.parent).glob("**/*"):
+                if re.match("[A-z]{1,2}\.[A-z\-]*cc-.*\.nwchem", r.name):
+                    name = re.match("[A-z]{1,2}\.([A-z\-]*cc-.*)\.nwchem", r.name).group(1)
+                    name = f"{typ}-{name}"
+                    with open(r, "r") as fhandle:
+                        b = BasisSet.from_nwchem(fhandle,
+                                                 duplicate_handling = "new",
+                                                 attrs = {"n_el": pseudo.n_el_tot,
+                                                          "name": name,
+                                                          "tags": tags})
+                        if len(b) == 0: continue
+                        b, = b
+                    basis.append({"path": r,
+                                  "obj": b})
 
             if element not in elements:
                 elements[element] = {"path": element_path,
@@ -82,58 +103,16 @@ def add_row(p, elements = elements):
 
             if typ not in elements[element]["types"]:
                 elements[element]["types"][typ] = {"path": typ_path,
-                                                   "basis": [],
-                                                   "pseudos": [],
-                                                   "tags": ["ECP", typ, ]}
-            val = {}
-            val["path"] = p
-            with open(p, "r") as fhandle:
-                if nature == "basis":
-                    try:
-                        obj, = BasisSet.from_nwchem(fhandle,
-                                                   duplicate_handling = "new")
-                    except:
-                        """
-                        Something went wrong in the import, continuing ...
-                        """
-                        return
-                    tags = ["aug"]
-                elif nature == "pseudos":
-                    try:
-                        obj, = Pseudopotential.from_gamess(fhandle,
-                                                          duplicate_handling = "new")
-                    except:
-                        """
-                        Something went wrong in the import, continuing ...
-                        """
-                        return
-                    tags = []
-                else:
-                    raise # TODO give here an error
-            obj.tags.extend(tags)
-            val["obj"] = obj
-            val["tags"] = tags
-            elements[element]["types"][typ][nature].append(val)
-
+                                                   "basis": basis,
+                                                   "pseudos": pseudos,
+                                                   "tags": tags}
 
         tempdir = pathlib.Path(tempfile.mkdtemp())
         git.Repo.clone_from(cls._URL, tempdir)
 
         for p in (tempdir/"recipes").glob("**/*"):
-            if str(p.name).lower().endswith(".gamess") or str(p.name).lower().endswith(".nwchem"):
-                add_row(p)
-
-        """ Update valence electrons """
-        for e in elements:
-            for t in elements[e]["types"]:
-                if len(elements[e]["types"][t]["pseudos"]) == 1:
-                    tags = [f'q{elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot}',
-                            f'c{elements[e]["types"][t]["pseudos"][0]["obj"].core_electrons}'
-                           ]
-                    elements[e]["types"][t]["tags"].extend(tags)
-                    for ii, b in enumerate(elements[e]["types"][t]["basis"]):
-                        elements[e]["types"][t]["basis"][ii]["obj"].n_el = elements[e]["types"][t]["pseudos"][0]["obj"].n_el_tot
-
+            if re.match("[A-z]{1,2}\.ccECP\.gamess", p.name):
+                add_data(p)
 
         return elements
 

From b1c13f5d56b62e8f915b40096b7266e9b38a8038 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 29 Sep 2021 09:40:08 +0200
Subject: [PATCH 24/47] Add NWCHEM writer for basis

---
 aiida_gaussian_datatypes/basisset/cli.py  |  3 ++-
 aiida_gaussian_datatypes/basisset/data.py | 31 ++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py
index 7d1c91c..22fc0e5 100644
--- a/aiida_gaussian_datatypes/basisset/cli.py
+++ b/aiida_gaussian_datatypes/basisset/cli.py
@@ -175,7 +175,7 @@ def list_basisset(sym, name, tags):
               help="filter by name")
 @click.option('tags', '--tag', '-t', multiple=True,
               help="filter by a tag (all tags must be present if specified multiple times)")
-@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', ]), default='cp2k',
+@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', 'nwchem']), default='cp2k',
               help="Chose the output format for the basiset: " + ', '.join(['cp2k', ]))
 # fmt: on
 @decorators.with_dbenv()
@@ -190,6 +190,7 @@ def dump_basisset(sym, name, tags, output_format, data):
 
     writers = {
         "cp2k": BasisSet.to_cp2k,
+        "nwchem" : BasisSet.to_nwchem,
     }
 
     if data:
diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index adcc9f9..e903785 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -342,6 +342,8 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs =
 
         """
         NWCHEM parser
+
+        TODO Maybe parser should move to "parsers"
         """
 
         element = None
@@ -360,7 +362,7 @@ def block_creator(b, orb, blocks = blocks):
                         "h" : 5,
                         "i" : 6 }
             block = { "n": 0, # I dont know how to setup main quantum number
-                      "l": [(orb_dict[orb], len(data))],
+                      "l": [(orb_dict[orb], 1)],
                       "coefficients" : [ [ d["exp"], d["cont"] ] for d in b ] }
             blocks.append(block)
 
@@ -458,6 +460,33 @@ def to_cp2k(self, fhandle):
             fhandle.write(line)
             fhandle.write("\n")
 
+    def to_nwchem(self, fhandle):
+        """
+        Write the Basis Set to the passed file handle in the format expected by NWCHEM.
+
+        :param fhandle: A valid output file handle
+        """
+        orb_dict = {0 : "s",
+                    1 : "p",
+                    2 : "d",
+                    3 : "f",
+                    4 : "g",
+                    5 : "h",
+                    6 : "i" }
+
+        fhandle.write(f"# from AiiDA BasisSet<uuid: {self.uuid}>\n")
+        for block in self.blocks:
+            offset = 0
+            for orb, num, in block["l"]:
+                fhandle.write(f"{self.element} {orb_dict[orb]}\n")
+                for lnum in range(num):
+                    for entry in block["coefficients"]:
+                        exponent = entry[0]
+                        coefficient = entry[1 + lnum + offset]
+                        fhandle.write(f"  {exponent:15.7f} {coefficient:15.7f}\n")
+                offset = num
+
+
     def get_matching_pseudopotential(self, *args, **kwargs):
         """
         Get a pseudopotential matching this basis set by at least element and number of valence electrons.

From c4d6c60984bcb6f1261d01f82c900c2808884629 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Wed, 29 Sep 2021 09:49:45 +0200
Subject: [PATCH 25/47] Add GAMESS format writer for basis

---
 aiida_gaussian_datatypes/basisset/cli.py  |  3 ++-
 aiida_gaussian_datatypes/basisset/data.py | 26 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py
index 22fc0e5..a299782 100644
--- a/aiida_gaussian_datatypes/basisset/cli.py
+++ b/aiida_gaussian_datatypes/basisset/cli.py
@@ -175,7 +175,7 @@ def list_basisset(sym, name, tags):
               help="filter by name")
 @click.option('tags', '--tag', '-t', multiple=True,
               help="filter by a tag (all tags must be present if specified multiple times)")
-@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', 'nwchem']), default='cp2k',
+@click.option('output_format', '-f', '--format', type=click.Choice(['cp2k', 'nwchem', 'gamess',]), default='cp2k',
               help="Chose the output format for the basiset: " + ', '.join(['cp2k', ]))
 # fmt: on
 @decorators.with_dbenv()
@@ -191,6 +191,7 @@ def dump_basisset(sym, name, tags, output_format, data):
     writers = {
         "cp2k": BasisSet.to_cp2k,
         "nwchem" : BasisSet.to_nwchem,
+        "gamess" : BasisSet.to_gamess,
     }
 
     if data:
diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index e903785..d175269 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -486,6 +486,32 @@ def to_nwchem(self, fhandle):
                         fhandle.write(f"  {exponent:15.7f} {coefficient:15.7f}\n")
                 offset = num
 
+    def to_gamess(self, fhandle):
+        """
+        Write the Basis Set to the passed file handle in the format expected by GAMESS.
+
+        :param fhandle: A valid output file handle
+        """
+        orb_dict = {0 : "s",
+                    1 : "p",
+                    2 : "d",
+                    3 : "f",
+                    4 : "g",
+                    5 : "h",
+                    6 : "i" }
+
+        fhandle.write(f"# from AiiDA BasisSet<uuid: {self.uuid}>\n")
+        for block in self.blocks:
+            offset = 0
+            for orb, num, in block["l"]:
+                fhandle.write(f" {orb_dict[orb].upper()}  {len(block['coefficients'])}\n")
+                for lnum in range(num):
+                    for ii, entry in enumerate(block["coefficients"]):
+                        exponent = entry[0]
+                        coefficient = entry[1 + lnum + offset]
+                        fhandle.write(f"  {ii + 1:3d} {exponent:15.7f} {coefficient:15.7f}\n")
+                offset = num
+
 
     def get_matching_pseudopotential(self, *args, **kwargs):
         """

From 0c367900156d7cfcb6796cc0cc4fe6e1a2b3110e Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Thu, 30 Sep 2021 14:51:22 +0200
Subject: [PATCH 26/47] Add non unique version of BasisSet for basis set
 operations

---
 aiida_gaussian_datatypes/basisset/data.py | 54 +++++++++++++++--------
 setup.json                                |  4 ++
 2 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index d175269..b6d6e6a 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -22,7 +22,7 @@
 from icecream import ic
 
 
-class BasisSet(Data):
+class BasisSetCommon(Data):
     """
     Provide a general way to store GTO basis sets from different codes within the AiiDA framework.
     """
@@ -49,7 +49,7 @@ def __init__(self, element=None, name=None, aliases=None, tags=None, n_el=None,
         if "label" not in kwargs:
             kwargs["label"] = name
 
-        super(BasisSet, self).__init__(**kwargs)
+        super(BasisSetCommon, self).__init__(**kwargs)
 
         self.set_attribute("name", name)
         self.set_attribute("element", element)
@@ -60,25 +60,10 @@ def __init__(self, element=None, name=None, aliases=None, tags=None, n_el=None,
         self.set_attribute("version", version)
 
     def store(self, *args, **kwargs):
-        """
-        Store the node, ensuring that the combination (element,name,version) is unique.
-        """
-        # TODO: this uniqueness check is not race-condition free.
-
-        try:
-            existing = self.get(self.element, self.name, self.version, match_aliases=False)
-        except NotExistent:
-            pass
-        else:
-            raise UniquenessError(
-                f"Gaussian Basis Set already exists for"
-                f" element={self.element}, name={self.name}, version={self.version}: {existing.uuid}"
-            )
-
-        return super(BasisSet, self).store(*args, **kwargs)
+        return super(BasisSetCommon, self).store(*args, **kwargs)
 
     def _validate(self):
-        super(BasisSet, self)._validate()
+        super(BasisSetCommon, self)._validate()
 
         from cp2k_input_tools.basissets import BasisSetData
 
@@ -524,3 +509,34 @@ def get_matching_pseudopotential(self, *args, **kwargs):
             return Pseudopotential.get(element=self.element, n_el=self.n_el, *args, **kwargs)
         else:
             return Pseudopotential.get(element=self.element, *args, **kwargs)
+
+class BasisSet(BasisSetCommon):
+
+    def __init__(self, *args, **kwargs):
+        super(BasisSet, self).__init__(*args, **kwargs)
+
+    def store(self, *args, **kwargs):
+        """
+        Store the node, ensuring that the combination (element,name,version) is unique.
+        """
+        # TODO: this uniqueness check is not race-condition free.
+
+        try:
+            existing = self.get(self.element, self.name, self.version, match_aliases=False)
+        except NotExistent:
+            pass
+        else:
+            raise UniquenessError(
+                f"Gaussian Basis Set already exists for"
+                f" element={self.element}, name={self.name}, version={self.version}: {existing.uuid}"
+            )
+
+        return super(BasisSet, self).store(*args, **kwargs)
+
+class BasisSetFree(BasisSetCommon):
+
+    def __init__(self, *args, **kwargs):
+        super(BasisSetFree, self).__init__(*args, **kwargs)
+
+    def store(self, *args, **kwargs):
+        return super(BasisSetFree, self).store(*args, **kwargs)
diff --git a/setup.json b/setup.json
index 6a20664..b4d404b 100644
--- a/setup.json
+++ b/setup.json
@@ -22,6 +22,7 @@
     "entry_points": {
         "aiida.data": [
             "gaussian.basisset = aiida_gaussian_datatypes.basisset.data:BasisSet",
+            "gaussian.basissetfree = aiida_gaussian_datatypes.basisset.data:BasisSetFree",
             "gaussian.pseudo = aiida_gaussian_datatypes.pseudopotential.data:Pseudopotential",
             "gaussian.pseudo.gthpseudopotential = aiida_gaussian_datatypes.pseudopotential.data:GTHPseudopotential",
             "gaussian.pseudo.ecppseudopotential = aiida_gaussian_datatypes.pseudopotential.data:ECPPseudopotential"
@@ -34,6 +35,9 @@
         "aiida.groups": [
             "gaussian.basisset = aiida_gaussian_datatypes.groups:BasisSetGroup",
             "gaussian.pseudo = aiida_gaussian_datatypes.groups:PseudopotentialGroup"
+        ],
+        "aiida.calculations": [
+            "gaussian.uncontract = aiida_gaussian_datatypes.calc.uncontract:uncontract"
         ]
     },
     "scripts": [],

From 99b9b88e84418b1d45b5ab2d9bc6c1a22f7a7413 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Thu, 30 Sep 2021 14:51:56 +0200
Subject: [PATCH 27/47] Change the order of stored function. In Gamess format
 the order is unintuitive first block is he upper most angular momentum and
 after goes the s, p, d ...

---
 aiida_gaussian_datatypes/pseudopotential/data.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 6b1c3ba..3dffb0f 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -365,6 +365,14 @@ def exists(pseudo):
                 """
                 functions[-1]["polynoms"] = [ int(x) for x in functions[-1]["polynoms"] ]
 
+        """
+        Change the order of functions so they match orbital momentum
+
+        In GAMESS format first block represents upper most lmax
+        and then the rest s, p, d, ...
+        """
+        functions = functions[1:] + [functions[0]]
+
         """
         TODO properly extract name
         """
@@ -433,7 +441,8 @@ def to_gamess(self, fhandle):
 
         if isinstance(self, ECPPseudopotential):
             fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n")
-            for fun in self.functions:
+            functions = [self.functions[-1]] + self.functions[:-1]
+            for fun in functions:
                 fhandle.write(f"{len(fun)}\n")
                 for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]):
                     fhandle.write(f"{prefactor:10.7f} {polynom:d} {exponent:10.7f}\n")

From c722b7dfa2d8ced11a6cab1dd86e16e99b382787 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Thu, 30 Sep 2021 15:33:27 +0200
Subject: [PATCH 28/47] Add writer for turborvb format, fix an error in gamess
 format writer

---
 .../pseudopotential/cli.py                    |  4 ++-
 .../pseudopotential/data.py                   | 27 +++++++++++++++++--
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index 32eb64c..3bfb491 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -184,7 +184,8 @@ def list_pseudo(sym, name, tags):
 @click.option('tags', '--tag', '-t', multiple=True,
               help="filter by a tag (all tags must be present if specified multiple times)")
 @click.option('output_format', '-f', '--format', type=click.Choice(['cp2k',
-                                                                    'gamess']), default='cp2k',
+                                                                    'gamess',
+                                                                    'turborvb']), default='cp2k',
               help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ]))
 @decorators.with_dbenv()
 # fmt: on
@@ -200,6 +201,7 @@ def dump_pseudo(sym, name, tags, output_format, data):
     writers = {
         "cp2k": Pseudopotential.to_cp2k,
         "gamess": Pseudopotential.to_gamess,
+        "turborvb": Pseudopotential.to_turborvb,
     }
 
     if data:
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 3dffb0f..b12f3d8 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -443,9 +443,32 @@ def to_gamess(self, fhandle):
             fhandle.write(f"{self.name} GEN {self.core_electrons} {self.lmax}\n")
             functions = [self.functions[-1]] + self.functions[:-1]
             for fun in functions:
-                fhandle.write(f"{len(fun)}\n")
+                fhandle.write(f"{len(fun['polynoms'])}\n")
                 for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]):
-                    fhandle.write(f"{prefactor:10.7f} {polynom:d} {exponent:10.7f}\n")
+                    fhandle.write(f"{prefactor:12.7f} {polynom:4d} {exponent:12.7f}\n")
+
+
+        else:
+            """
+            make an error
+            """
+            pass
+
+    def to_turborvb(self, fhandle):
+        """
+        Write this Pseudopotential instance to a file in TurboRVB format.
+
+        :param fhandle: open file handle
+        """
+
+        if isinstance(self, ECPPseudopotential):
+            fhandle.write(f"GEN\n")
+            fhandle.write(f"1 0 {self.lmax}\n")
+            fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ]))
+            fhandle.write("\n")
+            for fun in self.functions:
+                for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")]):
+                    fhandle.write(f"{prefactor:12.7f} {polynom:4d} {exponent:12.7f}\n")
 
 
         else:

From cdfd800415b7b853d24b0371bce77486e1b30fed Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 08:20:05 +0200
Subject: [PATCH 29/47] Add job for uncontraction

---
 aiida_gaussian_datatypes/calc/__init__py    |  0
 aiida_gaussian_datatypes/calc/uncontract.py | 33 +++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 aiida_gaussian_datatypes/calc/__init__py
 create mode 100644 aiida_gaussian_datatypes/calc/uncontract.py

diff --git a/aiida_gaussian_datatypes/calc/__init__py b/aiida_gaussian_datatypes/calc/__init__py
new file mode 100644
index 0000000..e69de29
diff --git a/aiida_gaussian_datatypes/calc/uncontract.py b/aiida_gaussian_datatypes/calc/uncontract.py
new file mode 100644
index 0000000..cdc7124
--- /dev/null
+++ b/aiida_gaussian_datatypes/calc/uncontract.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+
+from aiida.plugins import DataFactory
+from aiida.engine import calcfunction
+from icecream import ic
+"""
+
+"""
+
+BasisSet = DataFactory("gaussian.basisset")
+BasisSetFree = DataFactory("gaussian.basissetfree")
+
+@calcfunction
+def uncontract(basisset):
+    """
+
+    """
+    def disassemble(block):
+        n = block["n"]
+        l = block["l"]
+        for exp, cont in block["coefficients"]:
+            yield {"n" : n,
+                   "l" : l,
+                   "coefficients": [[exp, 1.0]]}
+    attr = basisset.attributes
+    blocks = []
+    for block in attr["blocks"]:
+        blocks.extend([ b for b in disassemble(block) ])
+    attr["blocks"] = blocks
+    attr["name"] += "-uncont"
+    ret = BasisSetFree(**attr)
+    return ret
+

From 4957c50d762b8bce762f144893940193fa92946e Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 09:23:54 +0200
Subject: [PATCH 30/47] Add Uniqness test to basis sets imported via
 from_gamess

---
 aiida_gaussian_datatypes/basisset/data.py | 57 +++++++++++------------
 1 file changed, 27 insertions(+), 30 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index b6d6e6a..c348ecb 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -324,6 +324,13 @@ def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs =
         :rtype: list
         """
 
+        def exists(bset):
+            try:
+                cls.get(bset["element"], bset["name"], match_aliases=False)
+            except NotExistent:
+                return False
+
+            return True
 
         """
         NWCHEM parser
@@ -375,36 +382,6 @@ def block_creator(b, orb, blocks = blocks):
             block_creator(data, orb)
             data = []
 
-        if duplicate_handling == "ignore":  # simply filter duplicates
-            #bsets = [bs for bs in bsets if not exists(bs)]
-            pass
-
-        elif duplicate_handling == "error":
-            #for bset in bsets:
-            #    try:
-            #        latest = cls.get(bset["element"], bset["name"], match_aliases=False)
-            #    except NotExistent:
-            #        pass
-            #    else:
-            #        raise UniquenessError(
-            #            f"Gaussian Basis Set already exists for"
-            #            f" element={bset['element']}, name={bset['name']}: {latest.uuid}"
-            #        )
-            pass
-
-        elif duplicate_handling == "new":
-            #for bset in bsets:
-            #    try:
-            #        latest = cls.get(bset["element"], bset["name"], match_aliases=False)
-            #    except NotExistent:
-            #        pass
-            #    else:
-            #        bset["version"] = latest.version + 1
-            pass
-
-        else:
-            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
-
         try:
             basis = {"element" : element.capitalize(),
                      "version" : 1,
@@ -430,6 +407,26 @@ def block_creator(b, orb, blocks = blocks):
         if len(basis["aliases"]) == 0:
             del basis["aliases"]
 
+        if duplicate_handling == "ignore":  # simply filter duplicates
+            if exists(basis):
+                return []
+
+        elif duplicate_handling == "error":
+            if exists(basis):
+                raise UniquenessError( f"Gaussian Basis Set already exists for"
+                                       f" element={basis['element']}, name={basis['name']}: {latest.uuid}")
+
+        elif duplicate_handling == "new":
+                try:
+                    latest = cls.get(basis["element"], basis["name"], match_aliases=False)
+                except NotExistent:
+                    pass
+                else:
+                    basis["version"] = latest.version + 1
+
+        else:
+            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
+
         return [cls(**basis)]
 
     def to_cp2k(self, fhandle):

From b9fd2a698096314bff915c3cba736f9eab415bcc Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 09:29:01 +0200
Subject: [PATCH 31/47] Rename MitasLibrary to QmcpackLibrary

---
 aiida_gaussian_datatypes/libraries.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index a8c6ac1..8829b23 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -49,7 +49,7 @@ class EmptyLibrary(_ExternalLibrary):
     pass
 
 @LibraryBookKeeper.register_library
-class MitasLibrary(_ExternalLibrary):
+class QmcpackLibrary(_ExternalLibrary):
 
     _URL = "https://github.com/QMCPACK/pseudopotentiallibrary.git"
 

From 7283a7319a643ccfe7785e76ea85616b930433c1 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 09:48:20 +0200
Subject: [PATCH 32/47] Add force-ignore option

---
 aiida_gaussian_datatypes/basisset/data.py        | 5 ++++-
 aiida_gaussian_datatypes/libraries.py            | 5 ++---
 aiida_gaussian_datatypes/pseudopotential/data.py | 5 ++++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index c348ecb..51ab2c9 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -407,7 +407,10 @@ def block_creator(b, orb, blocks = blocks):
         if len(basis["aliases"]) == 0:
             del basis["aliases"]
 
-        if duplicate_handling == "ignore":  # simply filter duplicates
+        if duplicate_handling == "force-ignore":  # It will check at the store stage
+            pass
+
+        elif duplicate_handling == "ignore":  # simply filter duplicates
             if exists(basis):
                 return []
 
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 8829b23..0d4c3c1 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -71,7 +71,7 @@ def add_data(p, elements = elements):
             """ Load Pseudopotential first """
             with open(p, "r") as fhandle:
                 pseudo, = Pseudopotential.from_gamess(fhandle,
-                                                      duplicate_handling = "new")
+                                                      duplicate_handling = "force-ignore")
             tags.append(f"q{pseudo.n_el_tot}")
             tags.append(f"c{pseudo.core_electrons}")
             pseudo.tags.extend(tags)
@@ -79,7 +79,6 @@ def add_data(p, elements = elements):
             pseudos = [{"path": p,
                         "obj": pseudo}]
 
-
             """ Load Basis sets """
             basis = []
             for r in (p.parent).glob("**/*"):
@@ -88,7 +87,7 @@ def add_data(p, elements = elements):
                     name = f"{typ}-{name}"
                     with open(r, "r") as fhandle:
                         b = BasisSet.from_nwchem(fhandle,
-                                                 duplicate_handling = "new",
+                                                 duplicate_handling = "force-ignore",
                                                  attrs = {"n_el": pseudo.n_el_tot,
                                                           "name": name,
                                                           "tags": tags})
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index b12f3d8..17363e8 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -391,7 +391,10 @@ def exists(pseudo):
                 "n_el"           : None,
                 "n_el_tot"       : SYM2NUM[element] - core_electrons}
 
-        if duplicate_handling == "ignore":  # simply filter duplicates
+        if duplicate_handling == "force-ignore":  # This will be checked at the store stage
+            pass
+
+        elif duplicate_handling == "ignore":  # simply filter duplicates
             if exists(data):
                 return []
 

From 79c42c4a568800a16bb43692d3c7aab6dd2af37f Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 10:42:38 +0200
Subject: [PATCH 33/47] Update setup.json

---
 setup.json | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.json b/setup.json
index b4d404b..33d94b6 100644
--- a/setup.json
+++ b/setup.json
@@ -43,6 +43,9 @@
     "scripts": [],
     "reentry_register": true,
     "install_requires": [
+	"gitpython >= 3.1.24",    
+	"icecream >= 2.1.1",
+	"pydriller >= 2.0",
         "pydantic >= 1.8.1",
         "aiida-core >= 1.6.2",
         "cp2k-input-tools >= 0.7.3"

From 9d9e05a4f5813e8a25349990021c6ab379053236 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 12:04:34 +0200
Subject: [PATCH 34/47] Fix naming convention for pseudo

---
 aiida_gaussian_datatypes/fetcher/cli.py          |  1 +
 aiida_gaussian_datatypes/libraries.py            |  3 ++-
 aiida_gaussian_datatypes/pseudopotential/data.py | 10 +++++++++-
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index c427833..a5e4c99 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -2,6 +2,7 @@
 
 import click
 import tabulate
+import pydriller
 from pathlib import Path
 from aiida.cmdline.utils import decorators, echo
 from aiida.cmdline.commands.cmd_data import verdi_data
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 0d4c3c1..b15d340 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -71,7 +71,8 @@ def add_data(p, elements = elements):
             """ Load Pseudopotential first """
             with open(p, "r") as fhandle:
                 pseudo, = Pseudopotential.from_gamess(fhandle,
-                                                      duplicate_handling = "force-ignore")
+                                                      duplicate_handling = "force-ignore",
+                                                      attrs = {"name" : typ })
             tags.append(f"q{pseudo.n_el_tot}")
             tags.append(f"c{pseudo.core_electrons}")
             pseudo.tags.extend(tags)
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 17363e8..92085d1 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -320,7 +320,7 @@ def decimal2str(val):
         return [GTHPseudopotential(**p) for p in pseudos]
 
     @classmethod
-    def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False):
+    def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None):
         """
         Constructs a list with pseudopotential objects from a Pseudopotential in GAMESS format
 
@@ -339,6 +339,9 @@ def exists(pseudo):
 
             return True
 
+        if not attrs:
+            attrs = {}
+
         """
         Parser for Gamess format
         """
@@ -376,6 +379,7 @@ def exists(pseudo):
         """
         TODO properly extract name
         """
+
         element = name.split("-")[0]
         lmax = int(lmax)
         core_electrons = int(core_electrons)
@@ -391,6 +395,10 @@ def exists(pseudo):
                 "n_el"           : None,
                 "n_el_tot"       : SYM2NUM[element] - core_electrons}
 
+        if "name" in attrs:
+            data["aliases"].append(data["name"])
+            data["name"] = attrs["name"]
+
         if duplicate_handling == "force-ignore":  # This will be checked at the store stage
             pass
 

From 170aa3fbfa8cd4e3d7b04f61558550d816397f3f Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 12:13:03 +0200
Subject: [PATCH 35/47] Fix small typo

---
 aiida_gaussian_datatypes/fetcher/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index a5e4c99..ad43b08 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -153,7 +153,7 @@ def install_family(library):
                     echo.echo("Skipping (something went wrong)")
             for p in o["pseudos"]:
                 pseudo = p["obj"]
-                echo.echo_info(f"Adding Basis for: ", nl=False)
+                echo.echo_info(f"Adding Pseudopotential for: ", nl=False)
                 echo.echo(f"{pseudo.element} ({pseudo.name})...  ", nl=False)
                 try:
                     pseudo.store()

From 7c78e9e9da651c7631491fd3e14635292e549450 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 13:26:41 +0200
Subject: [PATCH 36/47] Add version controling based on commit hashes

---
 aiida_gaussian_datatypes/libraries.py | 44 +++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 3 deletions(-)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index b15d340..64b4fd3 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -9,11 +9,16 @@
 import git
 import tempfile
 import pathlib
+import pydriller
 from aiida_gaussian_datatypes import utils
 from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar
 from icecream import ic
 from .basisset.data import BasisSet
-from .pseudopotential.data import Pseudopotential
+from .pseudopotential.data import Pseudopotential, ECPPseudopotential
+
+from aiida.common.exceptions import (
+    NotExistent,
+)
 
 class LibraryBookKeeper:
 
@@ -57,7 +62,7 @@ class QmcpackLibrary(_ExternalLibrary):
     def fetch(cls):
 
         elements = {}
-        def add_data(p, elements = elements):
+        def add_data(p, tempdir, elements = elements):
             element = str(p.parent.parent.name)
             if element not in utils.SYM2NUM: # Check if element is valid
                 return
@@ -73,6 +78,22 @@ def add_data(p, elements = elements):
                 pseudo, = Pseudopotential.from_gamess(fhandle,
                                                       duplicate_handling = "force-ignore",
                                                       attrs = {"name" : typ })
+
+            commithash = ""
+            for commit in pydriller.Repository(str(tempdir), filepath=str(p)).traverse_commits():
+                commithash = commit.hash
+            if commithash == "": return
+            pseudo.extras["commithash"] = commithash
+
+            try:
+                latest = ECPPseudopotential.get(pseudo.element,
+                                                pseudo.name)
+                pseudo.version = latest.version
+                if latest.extras["commithash"] != commithash:
+                    pseudo.version += 1
+            except NotExistent:
+                pass
+
             tags.append(f"q{pseudo.n_el_tot}")
             tags.append(f"c{pseudo.core_electrons}")
             pseudo.tags.extend(tags)
@@ -94,9 +115,26 @@ def add_data(p, elements = elements):
                                                           "tags": tags})
                         if len(b) == 0: continue
                         b, = b
+
+                        commithash = ""
+                        for commit in pydriller.Repository(str(tempdir), filepath=str(r)).traverse_commits():
+                            commithash = commit.hash
+                        if commithash == "": return
+                        b.extras["commithash"] = commithash
+
+                        try:
+                            latest = BasisSet.get(b.element,
+                                                  b.name)
+                            b.version = latest.version
+                            if latest.extras["commithash"] != commithash:
+                                b.version += 1
+                        except NotExistent:
+                            pass
+
                     basis.append({"path": r,
                                   "obj": b})
 
+
             if element not in elements:
                 elements[element] = {"path": element_path,
                                      "types": {}}
@@ -112,7 +150,7 @@ def add_data(p, elements = elements):
 
         for p in (tempdir/"recipes").glob("**/*"):
             if re.match("[A-z]{1,2}\.ccECP\.gamess", p.name):
-                add_data(p)
+                add_data(p, tempdir)
 
         return elements
 

From 2f69d247958b12ef4c6c1984d918ca37492e98b7 Mon Sep 17 00:00:00 2001
From: addman <pravod@gmail.com>
Date: Fri, 1 Oct 2021 13:46:35 +0200
Subject: [PATCH 37/47] Changed version controling. Now the version number is
 the serial number of the commit

---
 aiida_gaussian_datatypes/libraries.py | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 64b4fd3..6556270 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -78,21 +78,12 @@ def add_data(p, tempdir, elements = elements):
                 pseudo, = Pseudopotential.from_gamess(fhandle,
                                                       duplicate_handling = "force-ignore",
                                                       attrs = {"name" : typ })
-
             commithash = ""
-            for commit in pydriller.Repository(str(tempdir), filepath=str(p)).traverse_commits():
+            for version, commit in enumerate(pydriller.Repository(str(tempdir), filepath=str(p)).traverse_commits()):
                 commithash = commit.hash
             if commithash == "": return
             pseudo.extras["commithash"] = commithash
-
-            try:
-                latest = ECPPseudopotential.get(pseudo.element,
-                                                pseudo.name)
-                pseudo.version = latest.version
-                if latest.extras["commithash"] != commithash:
-                    pseudo.version += 1
-            except NotExistent:
-                pass
+            pseudo.attributes["version"] = version + 1
 
             tags.append(f"q{pseudo.n_el_tot}")
             tags.append(f"c{pseudo.core_electrons}")
@@ -117,19 +108,11 @@ def add_data(p, tempdir, elements = elements):
                         b, = b
 
                         commithash = ""
-                        for commit in pydriller.Repository(str(tempdir), filepath=str(r)).traverse_commits():
+                        for version, commit in enumerate(pydriller.Repository(str(tempdir), filepath=str(r)).traverse_commits()):
                             commithash = commit.hash
                         if commithash == "": return
                         b.extras["commithash"] = commithash
-
-                        try:
-                            latest = BasisSet.get(b.element,
-                                                  b.name)
-                            b.version = latest.version
-                            if latest.extras["commithash"] != commithash:
-                                b.version += 1
-                        except NotExistent:
-                            pass
+                        b.attributes["version"] = version + 1
 
                     basis.append({"path": r,
                                   "obj": b})

From 2e05946f0166348d0350cbd73eb3493e66a37fea Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Mon, 13 Dec 2021 21:14:40 +0100
Subject: [PATCH 38/47] Fix bad PP generation for TurboRVB, and others

---
 aiida_gaussian_datatypes/basisset/data.py     |  28 ++++-
 .../pseudopotential/cli.py                    |   3 +-
 .../pseudopotential/data.py                   | 113 +++++++++++++++++-
 3 files changed, 140 insertions(+), 4 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 51ab2c9..27659bb 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -69,7 +69,7 @@ def _validate(self):
 
         try:
             # directly raises an exception for the data if something's amiss, extra fields are ignored
-            BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes})
+            # BasisSetData.from_dict({"identifiers": self.aliases, **self.attributes})
 
             #assert isinstance(self.name, str) and self.name
             assert (
@@ -497,6 +497,32 @@ def to_gamess(self, fhandle):
                         fhandle.write(f"  {ii + 1:3d} {exponent:15.7f} {coefficient:15.7f}\n")
                 offset = num
 
+    def to_gaussian(self, fhandle):
+        """
+        Write the Basis Set to the passed file handle in the format expected by Gaussian.
+
+        :param fhandle: A valid output file handle
+        """
+        orb_dict = {0 : "s",
+                    1 : "p",
+                    2 : "d",
+                    3 : "f",
+                    4 : "g",
+                    5 : "h",
+                    6 : "i" }
+
+        fhandle.write(f"# from AiiDA BasisSet<uuid: {self.uuid}>\n")
+        for block in self.blocks:
+            offset = 0
+            for orb, num, in block["l"]:
+                fhandle.write(f" {orb_dict[orb].upper()}  {len(block['coefficients'])}\n")
+                for lnum in range(num):
+                    for ii, entry in enumerate(block["coefficients"]):
+                        exponent = entry[0]
+                        coefficient = entry[1 + lnum + offset]
+                        fhandle.write(f"  {ii + 1:3d} {exponent:15.7f} {coefficient:15.7f}\n")
+                offset = num
+
 
     def get_matching_pseudopotential(self, *args, **kwargs):
         """
diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index 3bfb491..b0166d2 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -75,7 +75,7 @@ def cli():
     help="filter by a tag (all tags must be present if specified multiple times)")
 @click.option(
     'fformat', '-f', '--format',
-    type=click.Choice(['cp2k', 'gamess' ]), default='cp2k',
+    type=click.Choice(['cp2k', 'gamess', 'turborvb' ]), default='cp2k',
     help="the format of the pseudopotential file")
 @click.option(
     '--duplicates',
@@ -99,6 +99,7 @@ def import_pseudo(pseudopotential_file, fformat, sym, tags, duplicates, ignore_i
     loaders = {
         "cp2k": Pseudopotential.from_cp2k,
         "gamess": Pseudopotential.from_gamess,
+        "turborvb": Pseudopotential.from_turborvb,
     }
 
     filters = {
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 92085d1..0cc110b 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -422,6 +422,112 @@ def exists(pseudo):
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
         return [ECPPseudopotential(**data)]
+    @classmethod
+    def from_turborvb(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None, name = None):
+        """
+        Constructs a list with pseudopotential objects from a Pseudopotential in TurboRVB format
+
+        :param fhandle: open file handle
+        :param filters: a dict with attribute filter functions
+        :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version))
+        :param ignore_invalid: whether to ignore invalid entries silently
+        :rtype: list
+        """
+
+        if hasattr(fhandle, "name"):
+            import re
+            if re.match("Z[0-9]{1,2}\_atomnumber[0-9]{1,2}\.[A-z]+",
+                        fhandle.name):
+                ret = re.match("Z[0-9]{1,2}\_atomnumber([0-9]{1,2})\.[A-z]+",
+                               fhandle.name)
+                atnum = int(ret.group(1))
+                element = list(SYM2NUM.keys())[list(SYM2NUM.values()).index(atnum)]
+                name = fhandle.name
+
+
+        def exists(pseudo):
+            try:
+                cls.get(pseudo["element"], pseudo["name"], match_aliases=False)
+            except NotExistent:
+                return False
+
+            return True
+
+        if not attrs:
+            attrs = {}
+
+        """
+        Parser for TurboRVB format
+        """
+
+        functions = []
+        ns = 0
+        for ii, line in enumerate(fhandle):
+            if ii == 0: continue
+            if ii == 1:
+                num, r0, lmax = [float(x) for x in line.split()]
+                continue
+            if ii == 2:
+                numf = [float(x) for x in line.split()]
+                for jj in range(len(numf)):
+                    functions.append({"prefactors" : [],
+                                      "polynoms"   : [],
+                                      "exponents"  : []})
+                continue
+            for jj in range(len(numf)):
+                if numf[jj] < 1: continue
+                numf[jj] -= 1
+                for key, value in zip(("prefactors", "polynoms", "exponents"), map(float, line.split())):
+                    functions[jj][key].append(value)
+
+                functions[jj]["polynoms"] = [ int(x) for x in functions[jj]["polynoms"] ]
+                break
+
+        """
+        TODO properly extract name
+        """
+
+        lmax = int(lmax)
+
+        data = {"functions"      : functions,
+                "element"        : element,
+                "aliases"        : [name],
+                "name"           : name,
+                "core_electrons" : 0,
+                "lmax"           : lmax,
+                "version"        : 1,
+                "n_el"           : None,
+                "n_el_tot"       : 0}
+
+        if "name" in attrs:
+            data["aliases"].append(data["name"])
+            data["name"] = attrs["name"]
+
+        if duplicate_handling == "force-ignore":  # This will be checked at the store stage
+            pass
+
+        elif duplicate_handling == "ignore":  # simply filter duplicates
+            if exists(data):
+                return []
+
+        elif duplicate_handling == "error":
+            if exists(data):
+                raise UniquenessError(
+                    f"Gaussian Pseudopotential already exists for"
+                    f" element={data['element']}, name={data['name']}: {latest.uuid}"
+                )
+
+        elif duplicate_handling == "new":
+            if exists(data):
+                latest = cls.get(data["element"], data["name"], match_aliases=False)
+                data["version"] = latest.version + 1
+
+        else:
+            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
+
+        pp = ECPPseudopotential(**data)
+        pp.set_extra("r0", r0)
+        return [pp]
 
     def to_cp2k(self, fhandle):
         """
@@ -473,8 +579,11 @@ def to_turborvb(self, fhandle):
         """
 
         if isinstance(self, ECPPseudopotential):
-            fhandle.write(f"GEN\n")
-            fhandle.write(f"1 0 {self.lmax}\n")
+            fhandle.write(f"ECP\n")
+            r0 = 0.0
+            if "r0" in self.extras:
+                r0 = self.extras["r0"]
+            fhandle.write(f"1 {r0:4.2f} {len(self.functions)}\n")
             fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ]))
             fhandle.write("\n")
             for fun in self.functions:

From bff08713deb746169da3b96ee32c740200db0b6f Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Wed, 23 Feb 2022 16:47:43 +0100
Subject: [PATCH 39/47] Add automatic tolerance detection

---
 .../pseudopotential/cli.py                    |  6 ++--
 .../pseudopotential/data.py                   | 32 +++++++++++++++++--
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index b0166d2..814bb0a 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -188,9 +188,11 @@ def list_pseudo(sym, name, tags):
                                                                     'gamess',
                                                                     'turborvb']), default='cp2k',
               help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ]))
+@click.option('-t', '--tolerance', type=str, default=1.0e-5,
+              help="set tolerance value for pseudo cutoff (default 1.0e-5, only for turborvb format)")
 @decorators.with_dbenv()
 # fmt: on
-def dump_pseudo(sym, name, tags, output_format, data):
+def dump_pseudo(sym, name, tags, output_format, data, tolerance):
     """
     Print specified Pseudopotentials
     """
@@ -232,4 +234,4 @@ def dump_pseudo(sym, name, tags, output_format, data):
         if echo.is_stdout_redirected():
             echo.echo_info("Dumping {}/{} ({})...".format(pseudo.name, pseudo.element, pseudo.uuid), err=True)
 
-        writers[output_format](pseudo, sys.stdout)
+        writers[output_format](pseudo, sys.stdout, tolerance = tolerance)
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 0cc110b..7751089 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -10,6 +10,7 @@
 from ..utils import SYM2NUM
 from decimal import Decimal
 from icecream import ic
+import numpy as np
 
 from aiida.common.exceptions import (
     MultipleObjectsError,
@@ -529,7 +530,7 @@ def exists(pseudo):
         pp.set_extra("r0", r0)
         return [pp]
 
-    def to_cp2k(self, fhandle):
+    def to_cp2k(self, fhandle, **kwargs):
         """
         Write this Pseudopotential instance to a file in CP2K format.
 
@@ -549,7 +550,7 @@ def to_cp2k(self, fhandle):
             """
             pass
 
-    def to_gamess(self, fhandle):
+    def to_gamess(self, fhandle, **kwargs):
         """
         Write this Pseudopotential instance to a file in Gamess format.
 
@@ -571,18 +572,43 @@ def to_gamess(self, fhandle):
             """
             pass
 
-    def to_turborvb(self, fhandle):
+    def to_turborvb(self, fhandle, tolerance = 1.0e-5):
         """
         Write this Pseudopotential instance to a file in TurboRVB format.
 
         :param fhandle: open file handle
+        :param tolerance: tolerance for pseudopotential
         """
+        def f(r, block):
+            nmax = len(block)
+            psip = np.zeros(nmax)
+            fun = 0.0
+            if r < 1.0e-9: r = 1.0e-9
+
+            for i in range(nmax):
+                psip[i] = np.exp(-block[i][2]*r*r + np.log(r)*block[i][1])
+
+            for i in range(nmax):
+                fun += psip[i] * block[i][0]
+
+            return fun/r/r
 
         if isinstance(self, ECPPseudopotential):
             fhandle.write(f"ECP\n")
             r0 = 0.0
             if "r0" in self.extras:
                 r0 = self.extras["r0"]
+            r0s = []
+            for fun in self.functions:
+                X = [ ii for ii in np.arange(0,10,0.01) ]
+                block = [ [prefactor, polynom, exponent] for prefactor, polynom, exponent in zip(*[ fun[k] for k in ("prefactors", "polynoms", "exponents")])]
+                Y = [ f(x, block) for x in X ]
+                for ii in reversed(range(len(X))):
+                    if Y[ii] > tolerance:
+                        r0s.append(X[ii])
+                        break
+            r0 = max(r0s)
+
             fhandle.write(f"1 {r0:4.2f} {len(self.functions)}\n")
             fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ]))
             fhandle.write("\n")

From a7d96faf92240c9438133bb7e2fa668433682976 Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Wed, 23 Feb 2022 18:53:38 +0100
Subject: [PATCH 40/47] WIP

---
 aiida_gaussian_datatypes/libraries.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 6556270..3ea4f42 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -137,3 +137,25 @@ def add_data(p, tempdir, elements = elements):
 
         return elements
 
+@LibraryBookKeeper.register_library
+class BFDLibrary(_ExternalLibrary):
+
+    _URL = "http://burkatzki.com/pseudos/step4.2.php?format=gaussian&element={e}&basis={b}"
+
+    @classmethod
+    def fetch(cls):
+
+        from ase.data import chemical_symbols
+        from ase.data import atomic_numbers
+        from time import sleep
+
+        list_of_basis  =[ f"v{s}z" for s in "dtq56" ]
+        list_of_basis += [ f"{x}_ano" for x in list_of_basis ]
+
+        for b in list_of_basis:
+            for ie in range(1, 87):
+                l = cls._URL.format(b = b, e = chemical_symbols[ie])
+                to_file(urlopen(l).read(), ie, b)
+                """ Cool down """
+                sleep(0.5)
+

From 7370aac40a4f2593fd984c2dea923fe5542937b8 Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Thu, 3 Mar 2022 08:28:25 +0100
Subject: [PATCH 41/47] should work

---
 aiida_gaussian_datatypes/basisset/cli.py      |   1 +
 aiida_gaussian_datatypes/basisset/data.py     | 117 +++++++++++++++++-
 aiida_gaussian_datatypes/libraries.py         |  57 ++++++++-
 .../pseudopotential/data.py                   | 103 +++++++++++++++
 4 files changed, 276 insertions(+), 2 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/cli.py b/aiida_gaussian_datatypes/basisset/cli.py
index a299782..9256866 100644
--- a/aiida_gaussian_datatypes/basisset/cli.py
+++ b/aiida_gaussian_datatypes/basisset/cli.py
@@ -91,6 +91,7 @@ def import_basisset(basisset_file, fformat, sym, tags, duplicates, group):
     loaders = {
         "cp2k": BasisSet.from_cp2k,
         "nwchem": BasisSet.from_nwchem,
+        "gaussian": BasisSet.from_gaussian,
     }
 
     filters = {
diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index 27659bb..5b2f232 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -313,6 +313,121 @@ def decimal2str(val):
 
         return [cls(**bs) for bs in bsets]
 
+    @classmethod
+    def from_gaussian(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = None):
+        """
+        Constructs a list with basis set objects from a Basis Set in Gaussian format
+
+        :param fhandle: open file handle
+        :param filters: a dict with attribute filter functions
+        :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version))
+        :rtype: list
+        """
+
+        def exists(bset):
+            try:
+                cls.get(bset["element"], bset["name"], match_aliases=False)
+            except NotExistent:
+                return False
+
+            return True
+
+        """
+        Gaussian parser
+
+        TODO Maybe parser should move to "parsers"
+        """
+
+        element = None
+        data = []
+        blocks = []
+
+        if not attrs:
+            attrs = {}
+
+        def block_creator(b, orb, blocks = blocks):
+            orb_dict = {"s" : 0,
+                        "p" : 1,
+                        "d" : 2,
+                        "f" : 3,
+                        "g" : 4,
+                        "h" : 5,
+                        "i" : 6 }
+            block = { "n": 0, # I dont know how to setup main quantum number
+                      "l": [(orb_dict[orb], 1)],
+                      "coefficients" : [ [ d["exp"], d["cont"] ] for d in b ] }
+            blocks.append(block)
+
+        orb = "x"
+        for ii, line in enumerate(fhandle):
+            if ii == 1:
+                element = line.lower().split()[0]
+                continue
+            if re.match("^[A-z ]+[0-9\. ]*$", line):
+                if len(data) != 0:
+                    block_creator(data, orb)
+                data = []
+                orb = line.lower().split()[0]
+            if re.match("^[+-.0-9 ]+$", line):
+                exp, cont, = [ float(x) for x in line.split() ]
+                data.append({"exp" : exp,
+                             "cont" : cont })
+        if len(data) != 0:
+            block_creator(data, orb)
+            data = []
+
+        try:
+            basis = {"element" : element.capitalize(),
+                     "version" : 1,
+                     "name" : "unknown",
+                     "tags" : [],
+                     "aliases" : [],
+                     "blocks" : blocks }
+        except:
+            return []
+
+        basis["name"] = "NA"
+
+        if hasattr(fhandle, "name"):
+            basis["name"] = Path(fhandle.name).name.replace(".nwchem", "")
+            basis["aliases"].append(basis["name"].split(".")[-1])
+
+        if "name" in attrs:
+            basis["aliases"].append(basis["name"])
+            basis["name"] = attrs["name"]
+
+        for attr in ("n_el", "tags",):
+            if attr in attrs:
+                basis[attr] = attrs[attr]
+
+        if len(basis["aliases"]) == 0:
+            del basis["aliases"]
+
+        if duplicate_handling == "force-ignore":  # It will check at the store stage
+            pass
+
+        elif duplicate_handling == "ignore":  # simply filter duplicates
+            if exists(basis):
+                return []
+
+        elif duplicate_handling == "error":
+            if exists(basis):
+                raise UniquenessError( f"Gaussian Basis Set already exists for"
+                                       f" element={basis['element']}, name={basis['name']}: {latest.uuid}")
+
+        elif duplicate_handling == "new":
+                try:
+                    latest = cls.get(basis["element"], basis["name"], match_aliases=False)
+                except NotExistent:
+                    pass
+                else:
+                    basis["version"] = latest.version + 1
+
+        else:
+            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
+
+        return [cls(**basis)]
+
     @classmethod
     def from_nwchem(cls, fhandle, filters=None, duplicate_handling="ignore", attrs = None):
         """
@@ -366,7 +481,7 @@ def block_creator(b, orb, blocks = blocks):
                 if len(data) != 0:
                     block_creator(data, orb)
                     data = []
-                el, orb, = line.lower().split()
+                el, orb = line.lower().split()
                 if element is None:
                     """
                     TODO check validity of element
diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 3ea4f42..8e5c4a4 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -4,6 +4,7 @@
 # Was there really a fish
 # That grants you that kind of wish
 #
+
 import os
 import re
 import git
@@ -147,7 +148,60 @@ def fetch(cls):
 
         from ase.data import chemical_symbols
         from ase.data import atomic_numbers
+        from urllib.request import urlopen
         from time import sleep
+        import io
+
+        elements = {}
+        def add_data(source, e, b):
+
+            source = str(source)
+            pat=re.compile("^.*?(" + e + "\s0.*$)",re.M|re.DOTALL)
+            x = pat.sub("\g<1>", source)
+            x = re.sub("\<br\s*/\>", "\n", x)
+            x = re.sub("\&nbsp", "", x)
+            x = re.sub("\&nbsp", "", x)
+            x = re.sub(".*html.*$", "", x)
+            pat=re.compile("^.*?(" + e + "\s0.*)("+e+" 0.*)$",re.M|re.DOTALL)
+            m = pat.match(x)
+            if(m):
+                bas = m.group(1)
+                ecp = m.group(2)
+                if len(bas) < 15: return
+                typ = "BFD"
+                pseudo, = Pseudopotential.from_gaussian(io.StringIO(ecp),
+                                                        duplicate_handling = "force-ignore",
+                                                        attrs = {"name" : f"{typ}" })
+
+                basisset, = BasisSet.from_gaussian(io.StringIO(f"\n{bas}"),
+                                                   duplicate_handling = "force-ignore",
+                                                   attrs = {"name" : f"{typ}-{b}" })
+                pseudos = [{"path": "",
+                            "obj": pseudo}]
+                version = 1
+                pseudo.attributes["version"] = version
+
+                tags = []
+                tags.append(f"q{pseudo.n_el_tot}")
+                tags.append(f"c{pseudo.core_electrons}")
+                pseudo.tags.extend(tags)
+
+                if e not in elements:
+                    elements[e] = {"path": "",
+                                   "types": {}}
+
+                if typ not in elements[e]["types"]:
+                    elements[e]["types"][typ] = {"path": "",
+                                                 "basis": [],
+                                                 "pseudos": pseudos,
+                                                 "tags": []}
+                elements[e]["types"][typ]["tags"].extend(tags)
+                elements[e]["types"][typ]["basis"].append({"path" : f"http://burkatzki.com|{b}",
+                                                           "obj"  : basisset})
+                elements[e]["types"][typ]["tags"].append("BFD")
+                tt = set(elements[e]["types"][typ]["tags"])
+                elements[e]["types"][typ]["tags"] = list(tt)
+
 
         list_of_basis  =[ f"v{s}z" for s in "dtq56" ]
         list_of_basis += [ f"{x}_ano" for x in list_of_basis ]
@@ -155,7 +209,8 @@ def fetch(cls):
         for b in list_of_basis:
             for ie in range(1, 87):
                 l = cls._URL.format(b = b, e = chemical_symbols[ie])
-                to_file(urlopen(l).read(), ie, b)
+                add_data(urlopen(l).read(), chemical_symbols[ie], b)
                 """ Cool down """
                 sleep(0.5)
 
+        return elements
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 0cc110b..da8074f 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -319,6 +319,108 @@ def decimal2str(val):
 
         return [GTHPseudopotential(**p) for p in pseudos]
 
+    @classmethod
+    def from_gaussian(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None):
+        """
+        Constructs a list with pseudopotential objects from a Pseudopotential in Gaussian format
+
+        :param fhandle: open file handle
+        :param filters: a dict with attribute filter functions
+        :param duplicate_handling: how to handle duplicates ("ignore", "error", "new" (version))
+        :param ignore_invalid: whether to ignore invalid entries silently
+        :rtype: list
+        """
+
+        def exists(pseudo):
+            try:
+                cls.get(pseudo["element"], pseudo["name"], match_aliases=False)
+            except NotExistent:
+                return False
+
+            return True
+
+        if not attrs:
+            attrs = {}
+
+        """
+        Parser for Gaussian format
+        """
+
+        was_comment_line = 0
+        functions = []
+        for ii, line in enumerate(fhandle):
+            if len(line.strip()) == 0: continue
+            if ii == 0:
+                element, n, = line.split()
+                continue
+            if ii == 1:
+                qmc, n, core_electrons, = line.split()
+                continue
+            if was_comment_line == -1:
+                was_comment_line = int(line.strip())
+            if was_comment_line == 0:
+                functions.append({"prefactors" : [],
+                                  "polynoms"   : [],
+                                  "exponents"  : []})
+            else:
+                was_comment_line -= 1
+                functions[-1]["exponents"].append(int(line.strip()[0]))
+                functions[-1]["polynoms"].append(float(line.strip()[1]))
+                functions[-1]["prefactors"].append(float(line.strip()[2]))
+
+        """
+        Change the order of functions so they match orbital momentum
+
+        In Gaussian format first block represents upper most lmax
+        and then the rest s, p, d, ...
+        """
+        functions = functions[1:] + [functions[0]]
+
+        """
+        TODO properly extract name
+        """
+
+        lmax = len(functions) - 1
+        core_electrons = int(core_electrons)
+
+        data = {"functions"      : functions,
+                "element"        : element,
+                "aliases"        : [qmc],
+                "name"           : qmc,
+                "core_electrons" : core_electrons,
+                "lmax"           : lmax,
+                "version"        : 1,
+                "n_el"           : None,
+                "n_el_tot"       : SYM2NUM[element] - core_electrons}
+
+        if "name" in attrs:
+            data["aliases"].append(data["name"])
+            data["name"] = attrs["name"]
+
+        if duplicate_handling == "force-ignore":  # This will be checked at the store stage
+            pass
+
+        elif duplicate_handling == "ignore":  # simply filter duplicates
+            if exists(data):
+                return []
+
+        elif duplicate_handling == "error":
+            if exists(data):
+                raise UniquenessError(
+                    f"Gaussian Pseudopotential already exists for"
+                    f" element={data['element']}, name={data['name']}: {latest.uuid}"
+                )
+
+        elif duplicate_handling == "new":
+            if exists(data):
+                latest = cls.get(data["element"], data["name"], match_aliases=False)
+                data["version"] = latest.version + 1
+
+        else:
+            raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
+
+        return [ECPPseudopotential(**data)]
+
     @classmethod
     def from_gamess(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None):
         """
@@ -422,6 +524,7 @@ def exists(pseudo):
             raise ValueError(f"Specified duplicate handling strategy not recognized: '{duplicate_handling}'")
 
         return [ECPPseudopotential(**data)]
+
     @classmethod
     def from_turborvb(cls, fhandle, filters=None, duplicate_handling="ignore", ignore_invalid=False, attrs = None, name = None):
         """

From 04d7a0c698d90635a9c250a95ce61e1b5cbc7e55 Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Thu, 3 Mar 2022 10:03:47 +0100
Subject: [PATCH 42/47] UPD cli

---
 aiida_gaussian_datatypes/fetcher/cli.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/aiida_gaussian_datatypes/fetcher/cli.py b/aiida_gaussian_datatypes/fetcher/cli.py
index ad43b08..37fc395 100644
--- a/aiida_gaussian_datatypes/fetcher/cli.py
+++ b/aiida_gaussian_datatypes/fetcher/cli.py
@@ -69,13 +69,21 @@ def __new__(cls, num, element, t, p, tags, b):
                 p = ""
                 tags = []
 
+            name = ""
+            m = re.match("http:\/\/burkatzki\.com\|([A-z]+)", b)
+            if m:
+                name = m.group(1)
+            m = re.match("[A-z]{1,2}\.(.+).nwchem", b)
+            if m:
+                name = m.group(1)
+
             return (
                 num,
                 element,
                 t,
                 p,
-                " ".join(tags),
-                re.match("[A-z]{1,2}\.(.+).nwchem", b).group(1),
+                " ".join(sorted(tags)),
+                name,
                 b
             )
 
@@ -86,9 +94,14 @@ def __new__(cls, num, element, t, p, tags, b):
                 continue
             p = d["types"][t]["pseudos"][0]
             for b in d["types"][t]["basis"]:
-                table_content.append(row(ii, e, t, p["path"].name,
+                name = ""
+                if isinstance(b["path"], str):
+                    name = b["path"]
+                if hasattr(b["path"], "name"):
+                    name = b["path"].name
+                table_content.append(row(ii, e, t, name,
                                          d["types"][t]["tags"],
-                                         b["path"].name))
+                                         name))
 
     #table_content = [row(n, p, v) for n, (p, v) in enumerate(elements.items())]
     return tabulate.tabulate(table_content, headers=["Nr.", "Element", "Type", "PseudoFile", "Tags", "Basis", "BasisFile"])

From 4d3944879b95082b5bb47d8382f9acae3928a957 Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Wed, 27 Apr 2022 11:40:23 +0200
Subject: [PATCH 43/47] Clean uncontract

---
 aiida_gaussian_datatypes/calc/__init__.py   | 0
 aiida_gaussian_datatypes/calc/uncontract.py | 1 -
 2 files changed, 1 deletion(-)
 create mode 100644 aiida_gaussian_datatypes/calc/__init__.py

diff --git a/aiida_gaussian_datatypes/calc/__init__.py b/aiida_gaussian_datatypes/calc/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/aiida_gaussian_datatypes/calc/uncontract.py b/aiida_gaussian_datatypes/calc/uncontract.py
index cdc7124..6bc4654 100644
--- a/aiida_gaussian_datatypes/calc/uncontract.py
+++ b/aiida_gaussian_datatypes/calc/uncontract.py
@@ -2,7 +2,6 @@
 
 from aiida.plugins import DataFactory
 from aiida.engine import calcfunction
-from icecream import ic
 """
 
 """

From 316549840f00543b72ac283703853637309114a1 Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Wed, 27 Apr 2022 11:56:23 +0200
Subject: [PATCH 44/47] Clean pseudopotential

---
 aiida_gaussian_datatypes/pseudopotential/cli.py |  2 +-
 .../pseudopotential/data.py                     | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/cli.py b/aiida_gaussian_datatypes/pseudopotential/cli.py
index 814bb0a..70ef2ce 100644
--- a/aiida_gaussian_datatypes/pseudopotential/cli.py
+++ b/aiida_gaussian_datatypes/pseudopotential/cli.py
@@ -188,7 +188,7 @@ def list_pseudo(sym, name, tags):
                                                                     'gamess',
                                                                     'turborvb']), default='cp2k',
               help="Chose the output format for the pseudopotentials: " + ', '.join(['cp2k', ]))
-@click.option('-t', '--tolerance', type=str, default=1.0e-5,
+@click.option('-r', '--tolerance', type=float, default=1.0e-5,
               help="set tolerance value for pseudo cutoff (default 1.0e-5, only for turborvb format)")
 @decorators.with_dbenv()
 # fmt: on
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 42074c2..1f091da 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -347,22 +347,23 @@ def exists(pseudo):
         Parser for Gaussian format
         """
 
-        was_comment_line = 0
+        was_comment_line = 2
         functions = []
+        functions.append({"prefactors" : [],
+                          "polynoms"   : [],
+                          "exponents"  : []})
         for ii, line in enumerate(fhandle):
-            if len(line.strip()) == 0: continue
+            ic(line.strip())
             if ii == 0:
                 element, n, = line.split()
                 continue
             if ii == 1:
                 qmc, n, core_electrons, = line.split()
                 continue
+            if ii == 2:
+                continue
             if was_comment_line == -1:
                 was_comment_line = int(line.strip())
-            if was_comment_line == 0:
-                functions.append({"prefactors" : [],
-                                  "polynoms"   : [],
-                                  "exponents"  : []})
             else:
                 was_comment_line -= 1
                 functions[-1]["exponents"].append(int(line.strip()[0]))
@@ -675,7 +676,7 @@ def to_gamess(self, fhandle, **kwargs):
             """
             pass
 
-    def to_turborvb(self, fhandle, tolerance = 1.0e-5):
+    def to_turborvb(self, fhandle, tolerance = 1.0e-5, index = 1, **kwargs):
         """
         Write this Pseudopotential instance to a file in TurboRVB format.
 
@@ -712,7 +713,7 @@ def f(r, block):
                         break
             r0 = max(r0s)
 
-            fhandle.write(f"1 {r0:4.2f} {len(self.functions)}\n")
+            fhandle.write(f"{index} {r0:4.2f} {len(self.functions)}\n")
             fhandle.write(" ".join([ f"{len(x['polynoms'])}" for x in self.functions ]))
             fhandle.write("\n")
             for fun in self.functions:

From 4cd04e06f1f7966de3c3323e8bb68be1ade4703e Mon Sep 17 00:00:00 2001
From: addman <addman@debian-BULLSEYE-live-builder-AMD64>
Date: Fri, 29 Apr 2022 08:16:35 +0200
Subject: [PATCH 45/47] Fix bugs in BFD dowloader

---
 aiida_gaussian_datatypes/libraries.py         | 10 ++++---
 .../pseudopotential/data.py                   | 27 ++++++++++++-------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/aiida_gaussian_datatypes/libraries.py b/aiida_gaussian_datatypes/libraries.py
index 8e5c4a4..4e58933 100644
--- a/aiida_gaussian_datatypes/libraries.py
+++ b/aiida_gaussian_datatypes/libraries.py
@@ -13,7 +13,6 @@
 import pydriller
 from aiida_gaussian_datatypes import utils
 from typing import Dict, Generic, List, Optional, Sequence, Type, TypeVar
-from icecream import ic
 from .basisset.data import BasisSet
 from .pseudopotential.data import Pseudopotential, ECPPseudopotential
 
@@ -173,9 +172,14 @@ def add_data(source, e, b):
                                                         duplicate_handling = "force-ignore",
                                                         attrs = {"name" : f"{typ}" })
 
+                tags = [typ]
+                if "ano" in b:
+                    tags.append("ANO")
                 basisset, = BasisSet.from_gaussian(io.StringIO(f"\n{bas}"),
                                                    duplicate_handling = "force-ignore",
-                                                   attrs = {"name" : f"{typ}-{b}" })
+                                                   attrs = {"name" : f"{typ}-{b}",
+                                                            "n_el" : pseudo.n_el_tot,
+                                                            "tags" : tags})
                 pseudos = [{"path": "",
                             "obj": pseudo}]
                 version = 1
@@ -207,7 +211,7 @@ def add_data(source, e, b):
         list_of_basis += [ f"{x}_ano" for x in list_of_basis ]
 
         for b in list_of_basis:
-            for ie in range(1, 87):
+            for ie in range(1, 86):
                 l = cls._URL.format(b = b, e = chemical_symbols[ie])
                 add_data(urlopen(l).read(), chemical_symbols[ie], b)
                 """ Cool down """
diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index 1f091da..25f71e7 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -10,6 +10,7 @@
 from ..utils import SYM2NUM
 from decimal import Decimal
 from icecream import ic
+import re
 import numpy as np
 
 from aiida.common.exceptions import (
@@ -347,11 +348,8 @@ def exists(pseudo):
         Parser for Gaussian format
         """
 
-        was_comment_line = 2
+        block_counter = 0
         functions = []
-        functions.append({"prefactors" : [],
-                          "polynoms"   : [],
-                          "exponents"  : []})
         for ii, line in enumerate(fhandle):
             ic(line.strip())
             if ii == 0:
@@ -362,13 +360,22 @@ def exists(pseudo):
                 continue
             if ii == 2:
                 continue
-            if was_comment_line == -1:
-                was_comment_line = int(line.strip())
             else:
-                was_comment_line -= 1
-                functions[-1]["exponents"].append(int(line.strip()[0]))
-                functions[-1]["polynoms"].append(float(line.strip()[1]))
-                functions[-1]["prefactors"].append(float(line.strip()[2]))
+                ic(block_counter)
+                if block_counter == 0:
+                    if line.strip() == "":
+                        continue
+                    m = re.match("[ ]*([0-9])+[ ]*$", line)
+                    if m:
+                        block_counter = int(m.group(1))
+                        functions.append({"prefactors" : [],
+                                          "polynoms"   : [],
+                                          "exponents"  : []})
+                else:
+                    functions[-1]["polynoms"].append(int(line.strip().split()[0]))
+                    functions[-1]["exponents"].append(float(line.strip().split()[1]))
+                    functions[-1]["prefactors"].append(float(line.strip().split()[2]))
+                    block_counter -= 1
 
         """
         Change the order of functions so they match orbital momentum

From 24e729d59cf3f6f8cafc89e8f6b5fda61a25d2e1 Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Tue, 8 Nov 2022 12:11:20 +0100
Subject: [PATCH 46/47] Supress warning

---
 aiida_gaussian_datatypes/basisset/data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aiida_gaussian_datatypes/basisset/data.py b/aiida_gaussian_datatypes/basisset/data.py
index b942cfa..77f6a52 100644
--- a/aiida_gaussian_datatypes/basisset/data.py
+++ b/aiida_gaussian_datatypes/basisset/data.py
@@ -330,12 +330,12 @@ def block_creator(b, orb, blocks = blocks):
             if ii == 1:
                 element = line.lower().split()[0]
                 continue
-            if re.match("^[A-z ]+[0-9\. ]*$", line):
+            if re.match(r"^[A-z ]+[0-9\. ]*$", line):
                 if len(data) != 0:
                     block_creator(data, orb)
                 data = []
                 orb = line.lower().split()[0]
-            if re.match("^[+-.0-9 ]+$", line):
+            if re.match(r"^[+-.0-9 ]+$", line):
                 exp, cont, = [ float(x) for x in line.split() ]
                 data.append({"exp" : exp,
                              "cont" : cont })

From 3f79b28123312d2e39d31c211a8bd05382c47bfa Mon Sep 17 00:00:00 2001
From: Otto Kohulak <pravod@gmail.com>
Date: Wed, 9 Nov 2022 14:27:25 +0100
Subject: [PATCH 47/47] Fix warning and errors in tests

---
 aiida_gaussian_datatypes/pseudopotential/data.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/aiida_gaussian_datatypes/pseudopotential/data.py b/aiida_gaussian_datatypes/pseudopotential/data.py
index e798fd1..11de26f 100644
--- a/aiida_gaussian_datatypes/pseudopotential/data.py
+++ b/aiida_gaussian_datatypes/pseudopotential/data.py
@@ -523,9 +523,9 @@ def from_turborvb(cls, fhandle, filters=None, duplicate_handling="ignore", ignor
 
         if hasattr(fhandle, "name"):
             import re
-            if re.match("Z[0-9]{1,2}\_atomnumber[0-9]{1,2}\.[A-z]+",
+            if re.match(r"Z[0-9]{1,2}\_atomnumber[0-9]{1,2}\.[A-z]+",
                         fhandle.name):
-                ret = re.match("Z[0-9]{1,2}\_atomnumber([0-9]{1,2})\.[A-z]+",
+                ret = re.match(r"Z[0-9]{1,2}\_atomnumber([0-9]{1,2})\.[A-z]+",
                                fhandle.name)
                 atnum = int(ret.group(1))
                 element = list(SYM2NUM.keys())[list(SYM2NUM.values()).index(atnum)]
@@ -888,6 +888,6 @@ def _pseudodata2dict(data: PseudopotentialData) -> Dict[str, Any]:
 
 
 def _dict2pseudodata(data: Dict[str, Any]) -> PseudopotentialData:
-    obj = {k: v for k, v in data.items() if k not in ("name", "tags", "version")}
+    obj = {k: v for k, v in data.items() if k not in ("name", "tags", "version", "n_el_tot")}
     obj["identifiers"] = obj.pop("aliases")
     return PseudopotentialData.parse_obj(obj)