From d71d87343233030b44126b5dacb6f3068684ed27 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Thu, 2 May 2024 12:44:04 +0100 Subject: [PATCH 01/20] feat(annotations): use bags for entries with multiple values --- pyneuroml/annotations.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 6a1e034c..9e59d61c 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -417,14 +417,20 @@ def _add_element( if annotation_style not in ["biosimulations", "miriam"]: raise ValueError("Annotation style must either be 'miriam' or 'biosimulations'") - for idf, label in info.items(): - top_node = BNode() - doc.add((subjectobj, node_type, top_node)) - if annotation_style == "biosimulations": + # for biosimulations, we do not use bags + if annotation_style == "biosimulations": + for idf, label in info.items(): + # add a top level node + top_node = BNode() + doc.add((subjectobj, node_type, top_node)) doc.add((top_node, DC.identifier, URIRef(idf))) doc.add((top_node, RDFS.label, Literal(label))) - elif annotation_style == "miriam": - Bag(doc, top_node, [URIRef(idf)]) + elif annotation_style == "miriam": + top_node = BNode() + doc.add((subjectobj, node_type, top_node)) + bag = Bag(doc, top_node, []) + for idf, label in info.items(): + bag.append(URIRef(idf)) def _add_humans( From 7f7cdfbefe740ddc826af1efeabf73ed4760bcbd Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Thu, 2 May 2024 14:30:48 +0100 Subject: [PATCH 02/20] feat(annotations): update creation code For MIRIAM, any entry that can have multiples is always put in a bag. For clarity, all humans are noted as resources and have their own entries with other FOAF metadata. --- pyneuroml/annotations.py | 111 +++++++++++++++++++++++++------------- tests/test_annotations.py | 2 +- 2 files changed, 74 insertions(+), 39 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 9e59d61c..d55426fc 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -21,7 +21,7 @@ try: from rdflib import BNode, Graph, Literal, Namespace, URIRef, Bag - from rdflib.namespace import DC, DCTERMS, FOAF, RDFS + from rdflib.namespace import DC, DCTERMS, FOAF, RDFS, RDF except ImportError: logger.warning("Please install optional dependencies to use annotation features:") logger.warning("pip install pyneuroml[annotations]") @@ -276,11 +276,12 @@ def create_annotation( if description: doc.add((subjectobj, DC.description, Literal(description))) if keywords: - for k in keywords: - doc.add((subjectobj, PRISM.keyword, Literal(k))) + _add_element(doc, subjectobj, keywords, PRISM.keyword, annotation_style) if thumbnails: - for t in thumbnails: - doc.add((subjectobj, COLLEX.thumbnail, URIRef(f"{fileprefix}/{t}"))) + prefixed = [ + f"{fileprefix}/{t}" if (not t.startswith("http")) else t for t in thumbnails + ] + _add_element(doc, subjectobj, prefixed, COLLEX.thumbnail, annotation_style) if organisms: doc.bind("bqbiol:hasTaxon", BQBIOL + "/hasTaxon") _add_element(doc, subjectobj, organisms, BQBIOL.hasTaxon, annotation_style) @@ -397,7 +398,7 @@ def create_annotation( def _add_element( doc: Graph, subjectobj: typing.Union[URIRef, Literal], - info: typing.Dict[str, str], + info_dict: typing.Union[typing.Iterable[str], typing.Dict[str, str]], node_type: URIRef, annotation_style: str, ): @@ -407,8 +408,8 @@ def _add_element( :type doc: RDF.Graph :param subjectobj: main object being referred to :type subjectobj: URIRef or Literal - :param info: dictionary of entries and their labels - :type info: dict + :param info_dict: dictionary of entries and their labels, or Iterable if no labels + :type info_dict: dict or Iterable :param node_type: node type :type node_type: URIRef :param annotation_style: type of annotation @@ -417,20 +418,31 @@ def _add_element( if annotation_style not in ["biosimulations", "miriam"]: raise ValueError("Annotation style must either be 'miriam' or 'biosimulations'") + # if not a dict, try to create a dict with blank values + if not isinstance(info_dict, dict): + copy_dict = {} # type: typing.Dict[str, str] + for i in info_dict: + copy_dict[i] = "" + info_dict = copy_dict + # for biosimulations, we do not use bags if annotation_style == "biosimulations": - for idf, label in info.items(): + for idf, label in info_dict.items(): # add a top level node top_node = BNode() doc.add((subjectobj, node_type, top_node)) doc.add((top_node, DC.identifier, URIRef(idf))) - doc.add((top_node, RDFS.label, Literal(label))) + if len(label) > 0: + doc.add((top_node, RDFS.label, Literal(label))) elif annotation_style == "miriam": + # even if there's only one entry, we still create a bag. + # this seems to be the norm in the SBML examples + # https://raw.githubusercontent.com/combine-org/combine-specifications/main/specifications/files/sbml.level-3.version-2.core.release-2.pdf top_node = BNode() doc.add((subjectobj, node_type, top_node)) bag = Bag(doc, top_node, []) - for idf, label in info.items(): - bag.append(URIRef(idf)) + for idf, label in info_dict.items(): + bag.append(_URIRef_or_Literal(idf)) def _add_humans( @@ -459,37 +471,60 @@ def _add_humans( if annotation_style not in ["biosimulations", "miriam"]: raise ValueError("Annotation style must either be 'miriam' or 'biosimulations'") - if isinstance(info_dict, dict): + # if not a dict, create a dict with blank values + if not isinstance(info_dict, dict): + copy_dict = {} # type: typing.Dict[str, typing.Dict] + for i in info_dict: + copy_dict[i] = {} + info_dict = copy_dict + + if annotation_style == "biosimulations": for name, info in info_dict.items(): top_node = BNode() doc.add((subjectobj, node_type, top_node)) - # add name - if annotation_style == "biosimulations": - doc.add((top_node, FOAF.name, Literal(name))) - doc.add((top_node, RDFS.label, Literal(name))) - elif annotation_style == "miriam": - bag = Bag(doc, top_node, [Literal(name)]) + doc.add((top_node, FOAF.name, Literal(name))) + doc.add((top_node, RDFS.label, Literal(name))) # other fields for idf, label in info.items(): - if annotation_style == "biosimulations": - if label == "accountname": - doc.add((top_node, FOAF.accountName, URIRef(idf))) - else: - doc.add((top_node, DC.identifier, URIRef(idf))) - elif annotation_style == "miriam": - if idf.startswith("http:"): - bag.append(URIRef(idf)) - else: - bag.append(Literal(idf)) + try: + foaf_type = getattr(FOAF, label) + except AttributeError: + logger.info("Not a FOAF attribute, using DC.identifier") + foaf_type = DC.identifier + doc.add((top_node, foaf_type, _URIRef_or_Literal(idf))) + elif annotation_style == "miriam": + # top level node: creator/contributor etc. + top_node = BNode() + doc.add((subjectobj, node_type, top_node)) + + for name, info in info_dict.items(): + # individual references in a list + ref = URIRef(f"#{name.replace(' ', '_')}") + bag = Bag(doc, top_node, []) + bag.append(ref) + + # individual nodes for details + doc.add((ref, FOAF.name, Literal(name))) + for idf, label in info.items(): + try: + foaf_type = getattr(FOAF, label) + except AttributeError: + logger.info("Not a FOAF attribute, using DC.identifier") + foaf_type = DC.identifier + doc.add((ref, foaf_type, _URIRef_or_Literal(idf))) + + +def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: + """Create a URIRef or Literal depending on string. + + :param astr: a string to create URIRef or Literal for + :type astr: str + :returns: a URIRef or Literal + + """ + if astr.startswith("http:"): + return URIRef(astr) else: - for name in info_dict: - top_node = BNode() - doc.add((subjectobj, node_type, top_node)) - # add name - if annotation_style == "biosimulations": - doc.add((top_node, FOAF.name, Literal(name))) - doc.add((top_node, RDFS.label, Literal(name))) - elif annotation_style == "miriam": - bag = Bag(doc, top_node, [Literal(name)]) + return Literal(astr) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 7f7511f1..56779f25 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -45,7 +45,7 @@ def test_create_annotation(self): "modified_dates": ["2024-04-18", "2024-04-19"], "authors": { "John Doe": { - "https://someurl.com": "orcid", + "https://someurl.com": "homepage", "https://anotherurl": "github", }, "Jane Smith": {}, From 6cb69336ae842b777fd18fae4543862e0c728df2 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Thu, 2 May 2024 16:58:49 +0100 Subject: [PATCH 03/20] feat(annotations): refactor code --- pyneuroml/annotations.py | 974 ++++++++++++++++++++------------------ tests/test_annotations.py | 91 ++-- 2 files changed, 564 insertions(+), 501 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index d55426fc..72e3d82c 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -14,7 +14,7 @@ import textwrap from lxml import etree -from pyneuroml.utils.xml import _find_elements, _get_attr_in_element +from pyneuroml.utils.xml import _find_elements logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -28,11 +28,6 @@ # From https://docs.biosimulations.org/concepts/conventions/simulation-project-metadata/ -PREDICATES_MAP = { - "keyword": "http://prismstandard.org/namespaces/basic/2.0/keyword", - "thumbnail": "http://www.collex.org/schema#thumbnail", -} - # From https://doi.org/10.1515/jib-2021-0020 (page 3) # rdf, foaf, dc already included in rdflib NAMESPACES_MAP = { @@ -53,467 +48,503 @@ } -def extract_annotations(nml2_file: str) -> None: - """Extract and print annotations from a NeuroML 2 file. +# create namespaces not included in rdflib +PRISM = Namespace(NAMESPACES_MAP["prism"]) +COLLEX = Namespace(NAMESPACES_MAP["collex"]) +BQBIOL = Namespace(NAMESPACES_MAP["bqbiol"]) +BQMODEL = Namespace(NAMESPACES_MAP["bqmodel"]) +SCORO = Namespace(NAMESPACES_MAP["scoro"]) + + +class Annotation(object): + """For handling NeuroML annotations""" + + # extra mappings + P_MAP_EXTRA = { + "hasTaxon": "bqbiol", + "encodes": "bqbiol", + "hasVersion": "bqbiol", + "isVersionOf": "bqbiol", + "hasPart": "bqbiol", + "isPartOf": "bqbiol", + "hasProperty": "bqbiol", + "isPropertyOf": "bqbiol", + "isInstanceOf": "bqmodel", + "hasInstance": "bqmodel", + "isDerivedFrom": "bqmodel", + "successor": "scoro", + "is": "bqmodel", + "isDescribedBy": "bqmodel", + "funder": "scoro", + } + + def __init__(self): + self.doc = Graph() + + # namespaces not in rdflib + self.doc.bind("prism", PRISM) + self.doc.bind("collex", COLLEX) + self.doc.bind("bqbiol", BQBIOL) + self.doc.bind("bqmodel", BQMODEL) + self.doc.bind("scoro", SCORO) + + for k, v in self.P_MAP_EXTRA.items(): + self.doc.bind(f"{v}:{k}", f"v.upper()/{k}") + + self.ARG_MAP = { + "title": DC.title, + "abstract": DCTERMS.abstract, + "description": DC.description, + "keyword": PRISM.keyword, + "thumbnail": COLLEX.thumbnail, + "organisms": BQBIOL.hasTaxon, + "encodes_other_biology": BQBIOL.encodes, + "has_version": BQBIOL.hasVersion, + "is_version_of": BQBIOL.isVersionOf, + "has_part": BQBIOL.hasPart, + "is_part_of": BQBIOL.isPartOf, + "has_property": BQBIOL.hasProperty, + "is_property_of": BQBIOL.isPropertyOf, + "sources": DC.source, + "is_instance_of": BQMODEL.isInstanceOf, + "has_instance": BQMODEL.hasInstance, + "predecessors": BQMODEL.isDerivedFrom, + "successors": SCORO.successor, + "see_also": RDFS.seeAlso, + "references": DCTERMS.references, + "other_ids": BQMODEL.IS, + "citations": BQMODEL.isDescribedBy, + "license": DCTERMS.license, + "funders": SCORO.funder, + "authors": DC.creator, + "contributors": DC.contributor, + "creation_date": DCTERMS.created, + "modified_dates": DCTERMS.modified, + } - :param nml2_file: name of NeuroML2 file to parse - :type nml2_file: str - """ - pp = pprint.PrettyPrinter() - test_file = open(nml2_file) - root = etree.parse(test_file).getroot() - annotations = {} # type: dict - - for a in _find_elements(root, "annotation"): - for r in _find_elements(a, "Description", rdf=True): - desc = _get_attr_in_element(r, "about", rdf=True) - annotations[desc] = [] - - for info in r: - if isinstance(info.tag, str): - kind = info.tag.replace( - "{http://biomodels.net/biology-qualifiers/}", "bqbiol:" - ) - kind = kind.replace( - "{http://biomodels.net/model-qualifiers/}", "bqmodel:" + def create_annotation( + self, + subject: str, + title: typing.Optional[str] = None, + abstract: typing.Optional[str] = None, + annotation_style: typing.Literal["miriam", "biosimulations"] = "biosimulations", + serialization_format: str = "pretty-xml", + write_to_file: typing.Optional[str] = None, + xml_header: bool = True, + indent: int = 12, + description: typing.Optional[str] = None, + keywords: typing.Optional[typing.List[str]] = None, + thumbnails: typing.Optional[typing.List[str]] = None, + organisms: typing.Optional[typing.Dict[str, str]] = None, + encodes_other_biology: typing.Optional[typing.Dict[str, str]] = None, + has_version: typing.Optional[typing.Dict[str, str]] = None, + is_version_of: typing.Optional[typing.Dict[str, str]] = None, + has_part: typing.Optional[typing.Dict[str, str]] = None, + is_part_of: typing.Optional[typing.Dict[str, str]] = None, + has_property: typing.Optional[typing.Dict[str, str]] = None, + is_property_of: typing.Optional[typing.Dict[str, str]] = None, + sources: typing.Optional[typing.Dict[str, str]] = None, + is_instance_of: typing.Optional[typing.Dict[str, str]] = None, + has_instance: typing.Optional[typing.Dict[str, str]] = None, + predecessors: typing.Optional[typing.Dict[str, str]] = None, + successors: typing.Optional[typing.Dict[str, str]] = None, + see_also: typing.Optional[typing.Dict[str, str]] = None, + references: typing.Optional[typing.Dict[str, str]] = None, + other_ids: typing.Optional[typing.Dict[str, str]] = None, + citations: typing.Optional[typing.Dict[str, str]] = None, + authors: typing.Optional[ + typing.Union[typing.Dict[str, typing.Dict[str, str]], typing.Set] + ] = None, + contributors: typing.Optional[ + typing.Union[typing.Dict[str, typing.Dict[str, str]], typing.Set] + ] = None, + license: typing.Optional[typing.Dict[str, str]] = None, + funders: typing.Optional[typing.Dict[str, str]] = None, + creation_date: typing.Optional[str] = None, + modified_dates: typing.Optional[typing.List[str]] = None, + ): + """Create an RDF annotation from the provided fields + + .. versionadded:: 1.2.10 + + This can be used to create an RDF annotation for a subject---a model or a + file like an OMEX archive file. It supports most qualifiers and will be + continuously updated to support more as they are added. + + It merely uses rdflib to make life easier for users to create annotations + by coding in the various predicates for each subject. + + For information on the specifications, see: + + - COMBINE specifications: https://github.com/combine-org/combine-specifications/blob/main/specifications/qualifiers-1.1.md + - Biosimulations guidelines: https://docs.biosimulations.org/concepts/conventions/simulation-project-metadata/ + - MIRIAM guidelines: https://drive.google.com/file/d/1JqjcH0T0UTWMuBj-scIMwsyt2z38A0vp/view + + + Note that: + + - not all qualifiers have been included yet + - the qualifiers and their representations may change in the future + + For any arguments here that take a dictionary of strings, the key is the + resource reference URI, and the value is the string label. For example: + + .. code-block:: python + + encodes_other_biology={ + "http://identifiers.org/GO:0009653": "anatomical structure morphogenesis", + "http://identifiers.org/kegg:ko04111": "Cell cycle - yeast", + } + + :param subject: subject/target of the annotation + could be a file, a mode component + :type subject: str + :param title: title of annotation + This is required for publishing models on biosimulations.org + :type title: str + :param abstract: an abstract + :type abstract: str + :param annotation_style: type of annotation: either "miriam" or + "biosimulations" (default). + + There's a difference in the annotation "style" suggested by MIRIAM and + Biosimulations. MIRIAM suggests the use of RDF containers (bags) + wherewas Biosimulations does not. This argument allows the user to + select what style they want to use for the annotation. + :type annotation_style: str + :param serialization_format: format to serialize in using `rdflib.serialize` + See: https://rdflib.readthedocs.io/en/stable/plugin_serializers.html + :type serialization_format: str + :param xml_header: toggle inclusion of xml header if serializing in xml format + :type xml_header: bool + :param indent: number of spaces to use to indent the annotation block + :type indent: int + :param description: a longer description + :type description: str + :param keywords: keywords + :type keywords: list(str) + :param thumbnails: thumbnails + :type thumbnails: list(str) + :param organisms: of organisms + :type organisms: dict(str, str) + :param encodes_other_biology: other biological entities + :type encodes_other_biology: dict(str, str) + :param has_version: other versions + :type has_version: dict(str, str) + :param is_version_of: is a version of + :type is_version_of: dict(str, str) + :param has_part: includes another as a part + :type has_part: dict(str, str) + :param is_part_of: is a part of another entity + :type is_part_of: dict(str, str) + :param has_property: has a property + :type has_property: dict(str, str) + :param is_property_of: is a property of another entity + :type is_property_of: dict(str, str) + :param sources: links to sources (on GitHub and so on) + :type sources: dict(str, str) + :param is_instance_of: is an instance of + :type is_instance_of: dict(str, str) + :param has_instance: has instance of another entity + :type has_instance: dict(str, str) + :param predecessors: predecessors of this entity + :type predecessors: dict(str, str) + :param successors: successors of this entity + :type successors: dict(str, str) + :param see_also: more information + :type see_also: dict(str, str) + :param references: references + :type references: dict(str, str) + :param other_ids: other IDs + :type other_ids: dict(str, str) + :param citations: related citations + :type citations: dict(str, str) + :param authors: authors + This can either be: + + - a set: {"Author A", "Author B"} + - a dictionary where the keys are author names and values are + dictionaries of more metadata: + + {"Author A": {"https://../": "accountname", "..": ".."}} + + The inner dictionary should have the reference or literal as key, and + can take a "label", which can be any of the FOAF attributes: + + http://xmlns.com/foaf/spec/#sec-glance + + :type authors: dict(str, dict(str, str) or set + :param contributors: other contributors, follows the same format as authors + :type contributors: dict(str, dict(str, str) or set + :param license: license + :type license: dict(str, str) + :param funders: funders + :type funders: dict(str, str) + :param creation_date: date in YYYY-MM-DD format when this was created (eg: 2024-04-19) + :type creation_date: str + :param modified_dates: dates in YYYY-MM-DD format when modifications were made + :type modified_dates: list(str) + :param write_to_file: path to file to write to + :type write_to_file: str + :returns: the annotation string in the requested format. + + """ + # if subject is a file + if subject.endswith(".omex"): + fileprefix = f"http://omex-library.org/{subject}" + subjectobj = URIRef(fileprefix) + elif subject.endswith(".nml"): + fileprefix = "http://omex-library.org/ArchiveName.omex" + subjectobj = URIRef(f"{fileprefix}/{subject}") + else: + subjectobj = Literal(subject) + + # get the args passed to this function + mylocals = locals() + + self.doc.add((subjectobj, self.ARG_MAP["title"], Literal(title))) + + # loop over the rest + for arg, val in mylocals.items(): + if arg in self.ARG_MAP.keys(): + # handle any special cases + if arg == "abstract" or arg == "description": + self.doc.add((subjectobj, self.ARG_MAP[arg], Literal(val))) + + elif arg == "thumbnails": + prefixed = [ + f"{fileprefix}/{t}" if (not t.startswith("http")) else t + for t in val + ] + self._add_element( + subjectobj, prefixed, self.ARG_MAP[arg], annotation_style ) - for li in _find_elements(info, "li", rdf=True): - attr = _get_attr_in_element(li, "resource", rdf=True) - if attr: - annotations[desc].append({kind: attr}) - - logger.info("Annotations in %s: " % (nml2_file)) - pp.pprint(annotations) - - -def create_annotation( - subject, - title=None, - abstract=None, - annotation_style: typing.Literal["miriam", "biosimulations"] = "biosimulations", - serialization_format: str = "pretty-xml", - write_to_file: typing.Optional[str] = None, - xml_header: bool = True, - indent: int = 12, - description: typing.Optional[str] = None, - keywords: typing.Optional[typing.List[str]] = None, - thumbnails: typing.Optional[typing.List[str]] = None, - organisms: typing.Optional[typing.Dict[str, str]] = None, - encodes_other_biology: typing.Optional[typing.Dict[str, str]] = None, - has_version: typing.Optional[typing.Dict[str, str]] = None, - is_version_of: typing.Optional[typing.Dict[str, str]] = None, - has_part: typing.Optional[typing.Dict[str, str]] = None, - is_part_of: typing.Optional[typing.Dict[str, str]] = None, - has_property: typing.Optional[typing.Dict[str, str]] = None, - is_property_of: typing.Optional[typing.Dict[str, str]] = None, - sources: typing.Optional[typing.Dict[str, str]] = None, - is_instance_of: typing.Optional[typing.Dict[str, str]] = None, - has_instance: typing.Optional[typing.Dict[str, str]] = None, - predecessors: typing.Optional[typing.Dict[str, str]] = None, - successors: typing.Optional[typing.Dict[str, str]] = None, - see_also: typing.Optional[typing.Dict[str, str]] = None, - references: typing.Optional[typing.Dict[str, str]] = None, - other_ids: typing.Optional[typing.Dict[str, str]] = None, - citations: typing.Optional[typing.Dict[str, str]] = None, - authors: typing.Optional[ - typing.Union[typing.Dict[str, typing.Dict[str, str]], typing.Set] - ] = None, - contributors: typing.Optional[ - typing.Union[typing.Dict[str, typing.Dict[str, str]], typing.Set] - ] = None, - license: typing.Optional[typing.Dict[str, str]] = None, - funders: typing.Optional[typing.Dict[str, str]] = None, - creation_date: typing.Optional[str] = None, - modified_dates: typing.Optional[typing.List[str]] = None, -): - """Create an RDF annotation from the provided fields - - .. versionadded:: 1.2.10 - - This can be used to create an RDF annotation for a subject---a model or a - file like an OMEX archive file. It supports most qualifiers and will be - continuously updated to support more as they are added. - - It merely uses rdflib to make life easier for users to create annotations - by coding in the various predicates for each subject. - - For information on the specifications, see: - - - COMBINE specifications: https://github.com/combine-org/combine-specifications/blob/main/specifications/qualifiers-1.1.md - - Biosimulations guidelines: https://docs.biosimulations.org/concepts/conventions/simulation-project-metadata/ - - MIRIAM guidelines: https://drive.google.com/file/d/1JqjcH0T0UTWMuBj-scIMwsyt2z38A0vp/view - - - Note that: - - - not all qualifiers have been included yet - - the qualifiers and their representations may change in the future - - For any arguments here that take a dictionary of strings, the key is the - resource reference URI, and the value is the string label. For example: - - .. code-block:: python - - encodes_other_biology={ - "http://identifiers.org/GO:0009653": "anatomical structure morphogenesis", - "http://identifiers.org/kegg:ko04111": "Cell cycle - yeast", - } + elif arg == "license": + assert len(val.items()) == 1 + self._add_element( + subjectobj, val, self.ARG_MAP[arg], annotation_style + ) - :param subject: subject/target of the annotation - could be a file, a mode component - :type subject: str - :param title: title of annotation - This is required for publishing models on biosimulations.org - :type title: str - :param abstract: an abstract - :type abstract: str - :param annotation_style: type of annotation: either "miriam" or - "biosimulations" (default). - - There's a difference in the annotation "style" suggested by MIRIAM and - Biosimulations. MIRIAM suggests the use of RDF containers (bags) - wherewas Biosimulations does not. This argument allows the user to - select what style they want to use for the annotation. - :type annotation_style: str - :param serialization_format: format to serialize in using `rdflib.serialize` - See: https://rdflib.readthedocs.io/en/stable/plugin_serializers.html - :type serialization_format: str - :param xml_header: toggle inclusion of xml header if serializing in xml format - :type xml_header: bool - :param indent: number of spaces to use to indent the annotation block - :type indent: int - :param description: a longer description - :type description: str - :param keywords: keywords - :type keywords: list(str) - :param thumbnails: thumbnails - :type thumbnails: list(str) - :param organisms: of organisms - :type organisms: dict(str, str) - :param encodes_other_biology: other biological entities - :type encodes_other_biology: dict(str, str) - :param has_version: other versions - :type has_version: dict(str, str) - :param is_version_of: is a version of - :type is_version_of: dict(str, str) - :param has_part: includes another as a part - :type has_part: dict(str, str) - :param is_part_of: is a part of another entity - :type is_part_of: dict(str, str) - :param has_property: has a property - :type has_property: dict(str, str) - :param is_property_of: is a property of another entity - :type is_property_of: dict(str, str) - :param sources: links to sources (on GitHub and so on) - :type sources: dict(str, str) - :param is_instance_of: is an instance of - :type is_instance_of: dict(str, str) - :param has_instance: has instance of another entity - :type has_instance: dict(str, str) - :param predecessors: predecessors of this entity - :type predecessors: dict(str, str) - :param successors: successors of this entity - :type successors: dict(str, str) - :param see_also: more information - :type see_also: dict(str, str) - :param references: references - :type references: dict(str, str) - :param other_ids: other IDs - :type other_ids: dict(str, str) - :param citations: related citations - :type citations: dict(str, str) - :param authors: authors - This can either be: - - - a set: {"Author A", "Author B"} - - a dictionary: {"Author A": {"https://../": "accountname", "..": ".."}} - - All labels apart from "accountname" are ignored - :type authors: dict(str, dict(str, str) or set - :param contributors: other contributors, follws the same format as authors - :type contributors: dict(str, dict(str, str) or set - :param license: license - :type license: dict(str, str) - :param funders: funders - :type funders: dict(str, str) - :param creation_date: date in YYYY-MM-DD format when this was created (eg: 2024-04-19) - :type creation_date: str - :param modified_dates: dates in YYYY-MM-DD format when modifications were made - :type modified_dates: list(str) - :param write_to_file: path to file to write to - :type write_to_file: str - :returns: the annotation string in the requested format. + elif arg == "authors" or arg == "contributors": + self._add_humans( + subjectobj, val, self.ARG_MAP[arg], annotation_style + ) + elif arg == "creation_date": + ac = BNode() + self.doc.add((subjectobj, self.ARG_MAP[arg], ac)) + self.doc.add((ac, DCTERMS.W3CDTF, Literal(val))) + + elif arg == "modified_dates": + ac = BNode() + self.doc.add((subjectobj, self.ARG_MAP[arg], ac)) + if annotation_style == "biosimulations": + for d in val: + self.doc.add((ac, DCTERMS.W3CDTF, Literal(d))) + else: + another = BNode() + self.doc.add((ac, DCTERMS.W3CDTF, another)) + newbag = Bag(self.doc, another) + for d in val: + newbag.append(Literal(d)) + else: + self._add_element( + subjectobj, val, self.ARG_MAP[arg], annotation_style + ) - """ - doc = Graph() - - # namespaces not in rdflib - PRISM = Namespace(NAMESPACES_MAP["prism"]) - doc.bind("prism", PRISM) - COLLEX = Namespace(NAMESPACES_MAP["collex"]) - doc.bind("collex", COLLEX) - BQBIOL = Namespace(NAMESPACES_MAP["bqbiol"]) - doc.bind("bqbiol", BQBIOL) - BQMODEL = Namespace(NAMESPACES_MAP["bqmodel"]) - doc.bind("bqmodel", BQMODEL) - SCORO = Namespace(NAMESPACES_MAP["scoro"]) - doc.bind("scoro", SCORO) - - # if subject is a file - if subject.endswith(".omex"): - fileprefix = f"http://omex-library.org/{subject}" - subjectobj = URIRef(fileprefix) - elif subject.endswith(".nml"): - fileprefix = "http://omex-library.org/ArchiveName.omex" - subjectobj = URIRef(f"{fileprefix}/{subject}") - else: - subjectobj = Literal(subject) - - doc.add((subjectobj, DC.title, Literal(title))) - if abstract: - doc.add((subjectobj, DCTERMS.abstract, Literal(abstract))) - if description: - doc.add((subjectobj, DC.description, Literal(description))) - if keywords: - _add_element(doc, subjectobj, keywords, PRISM.keyword, annotation_style) - if thumbnails: - prefixed = [ - f"{fileprefix}/{t}" if (not t.startswith("http")) else t for t in thumbnails - ] - _add_element(doc, subjectobj, prefixed, COLLEX.thumbnail, annotation_style) - if organisms: - doc.bind("bqbiol:hasTaxon", BQBIOL + "/hasTaxon") - _add_element(doc, subjectobj, organisms, BQBIOL.hasTaxon, annotation_style) - if encodes_other_biology: - doc.bind("bqbiol:encodes", BQBIOL + "/encodes") - _add_element( - doc, subjectobj, encodes_other_biology, BQBIOL.encodes, annotation_style - ) - if has_version: - doc.bind("bqbiol:hasVersion", BQBIOL + "/hasVersion") - _add_element(doc, subjectobj, has_version, BQBIOL.hasVersion, annotation_style) - if is_version_of: - doc.bind("bqbiol:isVersionOf", BQBIOL + "/isVersionOf") - _add_element( - doc, subjectobj, is_version_of, BQBIOL.isVersionOf, annotation_style - ) - if has_part: - doc.bind("bqbiol:hasPart", BQBIOL + "/hasPart") - _add_element(doc, subjectobj, has_part, BQBIOL.hasPart, annotation_style) - if is_part_of: - doc.bind("bqbiol:isPartOf", BQBIOL + "/isPartOf") - _add_element(doc, subjectobj, is_part_of, BQBIOL.isPartOf, annotation_style) - if has_property: - doc.bind("bqbiol:hasProperty", BQBIOL + "/hasProperty") - _add_element( - doc, subjectobj, has_property, BQBIOL.hasProperty, annotation_style - ) - if is_property_of: - doc.bind("bqbiol:isPropertyOf", BQBIOL + "/isPropertyOf") - _add_element( - doc, subjectobj, is_property_of, BQBIOL.isPropertyOf, annotation_style - ) - if sources: - _add_element(doc, subjectobj, sources, DC.source, annotation_style) - if is_instance_of: - doc.bind("bqmodel:isInstanceOf", BQMODEL + "/isInstanceOf") - _add_element( - doc, subjectobj, is_instance_of, BQMODEL.isInstanceOf, annotation_style - ) - if has_instance: - doc.bind("bqmodel:hasInstance", BQMODEL + "/hasInstance") - _add_element( - doc, subjectobj, has_instance, BQMODEL.hasInstance, annotation_style - ) - if predecessors: - doc.bind("bqmodel:isDerivedFrom", BQMODEL + "/isDerivedFrom") - _add_element( - doc, subjectobj, predecessors, BQMODEL.isDerivedFrom, annotation_style - ) - if successors: - doc.bind("scoro:successor", SCORO + "/successor") - _add_element(doc, subjectobj, successors, SCORO.successor, annotation_style) - if see_also: - _add_element(doc, subjectobj, see_also, RDFS.seeAlso, annotation_style) - if references: - _add_element(doc, subjectobj, references, DCTERMS.references, annotation_style) - if other_ids: - doc.bind("bqmodel:is", BQMODEL + "/is") - _add_element(doc, subjectobj, other_ids, BQMODEL.IS, annotation_style) - if citations: - doc.bind("bqmodel:isDescribedBy", BQMODEL + "/isDescribedBy") - _add_element( - doc, subjectobj, citations, BQMODEL.isDescribedBy, annotation_style - ) - if authors: - _add_humans(doc, subjectobj, authors, DC.creator, annotation_style) - if contributors: - _add_humans(doc, subjectobj, contributors, DC.contributor, annotation_style) - if license: - assert len(license.items()) == 1 - _add_element(doc, subjectobj, license, DCTERMS.license, annotation_style) - if funders: - doc.bind("scoro:funder", SCORO + "/funder") - _add_element(doc, subjectobj, funders, SCORO.funder, annotation_style) - if creation_date: - ac = BNode() - doc.add((subjectobj, DCTERMS.created, ac)) - doc.add((ac, DCTERMS.W3CDTF, Literal(creation_date))) - if modified_dates: - ac = BNode() - doc.add((subjectobj, DCTERMS.modified, ac)) - for d in modified_dates: - doc.add((ac, DCTERMS.W3CDTF, Literal(d))) - - annotation = doc.serialize(format=serialization_format) - - # indent - if indent > 0: - annotation = textwrap.indent(annotation, " " * indent) - - # xml issues - if "xml" in serialization_format: - # replace rdf:_1 etc with rdf:li - # our LEMS definitions only know rdf:li - # https://github.com/RDFLib/rdflib/issues/1374#issuecomment-885656850 - rdfli_pattern = re.compile(r"\brdf:_\d+\b") - annotation = rdfli_pattern.sub("rdf:li", annotation) - - # remove nodeids for rdflib BNodes: these aren't required - rdfbnode_pattern = re.compile(r' rdf:nodeID="\S+"') - annotation = rdfbnode_pattern.sub("", annotation) - - # remove xml header, not used when embedding into other NeuroML files - if xml_header is False: - annotation = annotation[annotation.find(">") + 1 :] - - if write_to_file: - with open(write_to_file, "w") as f: - print(annotation, file=f) - - return annotation - - -def _add_element( - doc: Graph, - subjectobj: typing.Union[URIRef, Literal], - info_dict: typing.Union[typing.Iterable[str], typing.Dict[str, str]], - node_type: URIRef, - annotation_style: str, -): - """Add an new element to the RDF annotation - - :param doc: main rdf document object - :type doc: RDF.Graph - :param subjectobj: main object being referred to - :type subjectobj: URIRef or Literal - :param info_dict: dictionary of entries and their labels, or Iterable if no labels - :type info_dict: dict or Iterable - :param node_type: node type - :type node_type: URIRef - :param annotation_style: type of annotation - :type annotation_style: str - """ - if annotation_style not in ["biosimulations", "miriam"]: - raise ValueError("Annotation style must either be 'miriam' or 'biosimulations'") - - # if not a dict, try to create a dict with blank values - if not isinstance(info_dict, dict): - copy_dict = {} # type: typing.Dict[str, str] - for i in info_dict: - copy_dict[i] = "" - info_dict = copy_dict - - # for biosimulations, we do not use bags - if annotation_style == "biosimulations": - for idf, label in info_dict.items(): - # add a top level node + annotation = self.doc.serialize(format=serialization_format) + + # indent + if indent > 0: + annotation = textwrap.indent(annotation, " " * indent) + + # xml issues + if "xml" in serialization_format: + # replace rdf:_1 etc with rdf:li + # our LEMS definitions only know rdf:li + # https://github.com/RDFLib/rdflib/issues/1374#issuecomment-885656850 + rdfli_pattern = re.compile(r"\brdf:_\d+\b") + annotation = rdfli_pattern.sub("rdf:li", annotation) + + # remove nodeids for rdflib BNodes: these aren't required + rdfbnode_pattern = re.compile(r' rdf:nodeID="\S+"') + annotation = rdfbnode_pattern.sub("", annotation) + + # remove xml header, not used when embedding into other NeuroML files + if xml_header is False: + annotation = annotation[annotation.find(">") + 1 :] + + if write_to_file: + with open(write_to_file, "w") as f: + print(annotation, file=f) + + return annotation + + def _add_element( + self, + subjectobj: typing.Union[URIRef, Literal], + info_dict: typing.Union[typing.Iterable[str], typing.Dict[str, str]], + node_type: URIRef, + annotation_style: str, + ): + """Add an new element to the RDF annotation + + :param subjectobj: main object being referred to + :type subjectobj: URIRef or Literal + :param info_dict: dictionary of entries and their labels, or Iterable if no labels + :type info_dict: dict or Iterable + :param node_type: node type + :type node_type: URIRef + :param annotation_style: type of annotation + :type annotation_style: str + """ + if annotation_style not in ["biosimulations", "miriam"]: + raise ValueError( + "Annotation style must either be 'miriam' or 'biosimulations'" + ) + + # do nothing if an empty dict is passed + if info_dict is None: + return + + # if not a dict, try to create a dict with blank values + if not isinstance(info_dict, dict): + copy_dict = {} # type: typing.Dict[str, str] + for i in info_dict: + copy_dict[i] = "" + info_dict = copy_dict + + # for biosimulations, we do not use bags + if annotation_style == "biosimulations": + for idf, label in info_dict.items(): + # add a top level node + top_node = BNode() + self.doc.add((subjectobj, node_type, top_node)) + self.doc.add((top_node, DC.identifier, URIRef(idf))) + if len(label) > 0: + self.doc.add((top_node, RDFS.label, Literal(label))) + elif annotation_style == "miriam": + # even if there's only one entry, we still create a bag. + # this seems to be the norm in the SBML examples + # https://raw.githubusercontent.com/combine-org/combine-specifications/main/specifications/files/sbml.level-3.version-2.core.release-2.pdf top_node = BNode() - doc.add((subjectobj, node_type, top_node)) - doc.add((top_node, DC.identifier, URIRef(idf))) - if len(label) > 0: - doc.add((top_node, RDFS.label, Literal(label))) - elif annotation_style == "miriam": - # even if there's only one entry, we still create a bag. - # this seems to be the norm in the SBML examples - # https://raw.githubusercontent.com/combine-org/combine-specifications/main/specifications/files/sbml.level-3.version-2.core.release-2.pdf - top_node = BNode() - doc.add((subjectobj, node_type, top_node)) - bag = Bag(doc, top_node, []) - for idf, label in info_dict.items(): - bag.append(_URIRef_or_Literal(idf)) - - -def _add_humans( - doc: Graph, - subjectobj: typing.Union[URIRef, Literal], - info_dict: typing.Union[typing.Dict[str, typing.Dict[str, str]], typing.Set], - node_type: URIRef, - annotation_style: str, -): - """Add an new elements related to humans to the RDF annotation. - - This covers authors/contributors where the same person can have multiple - annotations related to them. - - :param doc: main rdf document object - :type doc: RDF.Graph - :param subjectobj: main object being referred to - :type subjectobj: URIRef or Literal - :param info_dict: dictionary of information - :type info_dict: dict - :param node_type: node type - :type node_type: URIRef - :param annotation_style: type of annotation - :type annotation_style: str - """ - if annotation_style not in ["biosimulations", "miriam"]: - raise ValueError("Annotation style must either be 'miriam' or 'biosimulations'") - - # if not a dict, create a dict with blank values - if not isinstance(info_dict, dict): - copy_dict = {} # type: typing.Dict[str, typing.Dict] - for i in info_dict: - copy_dict[i] = {} - info_dict = copy_dict - - if annotation_style == "biosimulations": - for name, info in info_dict.items(): + self.doc.add((subjectobj, node_type, top_node)) + bag = Bag(self.doc, top_node, []) + for idf, label in info_dict.items(): + bag.append(_URIRef_or_Literal(idf)) + + def _add_humans( + self, + subjectobj: typing.Union[URIRef, Literal], + info_dict: typing.Union[typing.Dict[str, typing.Dict[str, str]], typing.Set], + node_type: URIRef, + annotation_style: str, + ): + """Add an new elements related to humans to the RDF annotation. + + This covers authors/contributors where the same person can have multiple + annotations related to them. + + :param subjectobj: main object being referred to + :type subjectobj: URIRef or Literal + :param info_dict: dictionary of information + :type info_dict: dict + :param node_type: node type + :type node_type: URIRef + :param annotation_style: type of annotation + :type annotation_style: str + """ + if annotation_style not in ["biosimulations", "miriam"]: + raise ValueError( + "Annotation style must either be 'miriam' or 'biosimulations'" + ) + + # if not a dict, create a dict with blank values + if not isinstance(info_dict, dict): + copy_dict = {} # type: typing.Dict[str, typing.Dict] + for i in info_dict: + copy_dict[i] = {} + info_dict = copy_dict + + if annotation_style == "biosimulations": + for name, info in info_dict.items(): + top_node = BNode() + self.doc.add((subjectobj, node_type, top_node)) + + self.doc.add((top_node, FOAF.name, Literal(name))) + self.doc.add((top_node, RDFS.label, Literal(name))) + + # other fields + for idf, label in info.items(): + try: + foaf_type = getattr(FOAF, label) + except AttributeError: + logger.info("Not a FOAF attribute, using DC.identifier") + foaf_type = DC.identifier + self.doc.add((top_node, foaf_type, _URIRef_or_Literal(idf))) + elif annotation_style == "miriam": + # top level node: creator/contributor etc. top_node = BNode() - doc.add((subjectobj, node_type, top_node)) - - doc.add((top_node, FOAF.name, Literal(name))) - doc.add((top_node, RDFS.label, Literal(name))) - - # other fields - for idf, label in info.items(): - try: - foaf_type = getattr(FOAF, label) - except AttributeError: - logger.info("Not a FOAF attribute, using DC.identifier") - foaf_type = DC.identifier - doc.add((top_node, foaf_type, _URIRef_or_Literal(idf))) - elif annotation_style == "miriam": - # top level node: creator/contributor etc. - top_node = BNode() - doc.add((subjectobj, node_type, top_node)) - - for name, info in info_dict.items(): - # individual references in a list - ref = URIRef(f"#{name.replace(' ', '_')}") - bag = Bag(doc, top_node, []) - bag.append(ref) - - # individual nodes for details - doc.add((ref, FOAF.name, Literal(name))) - for idf, label in info.items(): - try: - foaf_type = getattr(FOAF, label) - except AttributeError: - logger.info("Not a FOAF attribute, using DC.identifier") - foaf_type = DC.identifier - doc.add((ref, foaf_type, _URIRef_or_Literal(idf))) + self.doc.add((subjectobj, node_type, top_node)) + + for name, info in info_dict.items(): + # individual references in a list + ref = URIRef(f"#{name.replace(' ', '_')}") + bag = Bag(self.doc, top_node, []) + bag.append(ref) + + # individual nodes for details + self.doc.add((ref, FOAF.name, Literal(name))) + for idf, label in info.items(): + try: + foaf_type = getattr(FOAF, label) + except AttributeError: + logger.info("Not a FOAF attribute, using DC.identifier") + foaf_type = DC.identifier + self.doc.add((ref, foaf_type, _URIRef_or_Literal(idf))) + + def extract_annotations(self, nml2_file: str) -> None: + """Extract and print annotations from a NeuroML 2 file. + + :param nml2_file: name of NeuroML2 file to parse + :type nml2_file: str + """ + pp = pprint.PrettyPrinter() + test_file = open(nml2_file) + root = etree.parse(test_file).getroot() + annotations = {} # type: dict + + for a in _find_elements(root, "annotation"): + for r in _find_elements(a, "RDF", rdf=True): + contents = etree.tostring(r, pretty_print=True).decode("utf-8") + logger.debug(contents) + self.doc.parse(data=contents, format="application/rdf+xml") + + # for s, p, o in g: + # print(f"{s}: {p}: {o}") + + # for r in _find_elements(a, "Description", rdf=True): + # desc = _get_attr_in_element(r, "about", rdf=True) + # annotations[desc] = [] + # + # annotations[desc] = g.serialize(format="turtle2") + # + # for info in r: + # if isinstance(info.tag, str): + # kind = info.tag.replace( + # "{http://biomodels.net/biology-qualifiers/}", "bqbiol:" + # ) + # kind = kind.replace( + # "{http://biomodels.net/model-qualifiers/}", "bqmodel:" + # ) + # + # for li in _find_elements(info, "li", rdf=True): + # attr = _get_attr_in_element(li, "resource", rdf=True) + # if attr: + # annotations[desc].append({kind: attr}) + + logger.info("Annotations in %s: " % (nml2_file)) + pp.pprint(annotations) def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: @@ -528,3 +559,26 @@ def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: return URIRef(astr) else: return Literal(astr) + + +def create_annotation(*args, **kwargs): + """Wrapper around the Annotations.create_annotation method. + + :param **kwargs: TODO + :returns: TODO + + """ + new_annotation = Annotation() + return new_annotation.create_annotation(*args, **kwargs) + + +def extract_annotations(nml2_file: str): + """Wrapper around the Annotations.extract_annotations method. + + :param *args: TODO + :param **kwargs: TODO + :returns: TODO + + """ + new_annotation = Annotation() + return new_annotation.extract_annotations(nml2_file) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 56779f25..cd2166a9 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -9,8 +9,10 @@ import logging -from pyneuroml.annotations import create_annotation +from pyneuroml.annotations import create_annotation, extract_annotations +from pyneuroml.io import write_neuroml2_file import neuroml +import copy from . import BaseTestCase @@ -21,45 +23,42 @@ class TestAnnotations(BaseTestCase): """Test annotations module""" + common = { + "subject": "model.nml", + "description": "A tests model", + "abstract": "lol, something nice", + "keywords": ["something", "and something"], + "thumbnails": ["lol.png"], + "xml_header": False, + "organisms": { + "http://identifiers.org/taxonomy/4896": "Schizosaccharomyces pombe" + }, + "encodes_other_biology": { + "http://identifiers.org/GO:0009653": "anatomical structure morphogenesis", + "http://identifiers.org/kegg:ko04111": "Cell cycle - yeast", + }, + "sources": {"https://github.com/lala": "GitHub"}, + "predecessors": {"http://omex-library.org/BioSim0001.omex/model.xml": "model"}, + "creation_date": "2024-04-18", + "modified_dates": ["2024-04-18", "2024-04-19"], + "authors": { + "John Doe": { + "https://someurl.com": "homepage", + "https://anotherurl": "github", + }, + "Jane Smith": {}, + }, + "contributors": {"Jane Doe", "John Smith", "Jane Smith"}, + "see_also": {"http://link.com": "a link"}, + "references": {"http://reference.com": "a reference"}, + "funders": {"http://afundingbody.org": "a funding body"}, + "license": {"CC0": "license"}, + } + def test_create_annotation(self): """Test create_annotations""" - common = { - "subject": "model.nml", - "description": "A tests model", - "abstract": "lol, something nice", - "keywords": ["something", "and something"], - "thumbnails": ["lol.png"], - "xml_header": False, - "organisms": { - "http://identifiers.org/taxonomy/4896": "Schizosaccharomyces pombe" - }, - "encodes_other_biology": { - "http://identifiers.org/GO:0009653": "anatomical structure morphogenesis", - "http://identifiers.org/kegg:ko04111": "Cell cycle - yeast", - }, - "sources": {"https://github.com/lala": "GitHub"}, - "predecessors": { - "http://omex-library.org/BioSim0001.omex/model.xml": "model" - }, - "creation_date": "2024-04-18", - "modified_dates": ["2024-04-18", "2024-04-19"], - "authors": { - "John Doe": { - "https://someurl.com": "homepage", - "https://anotherurl": "github", - }, - "Jane Smith": {}, - }, - "contributors": {"Jane Doe", "John Smith", "Jane Smith"}, - "see_also": {"http://link.com": "a link"}, - "references": {"http://reference.com": "a reference"}, - "funders": {"http://afundingbody.org": "a funding body"}, - "license": {"CC0": "license"}, - } - annotation = create_annotation( - **common, - annotation_style="miriam", - ) + common1 = copy.deepcopy(self.common) + annotation = create_annotation(**common1, annotation_style="miriam") self.assertIsNotNone(annotation) print(annotation) @@ -69,9 +68,19 @@ def test_create_annotation(self): self.assertIsNone(newdoc.validate(recursive=True)) # biosimulations + common2 = copy.deepcopy(self.common) + annotation2 = create_annotation(**common2, annotation_style="biosimulations") + self.assertIsNotNone(annotation2) + print(annotation2) + + def test_extract_annotations(self): + """Test the extract_annotations function.""" annotation = create_annotation( - **common, - annotation_style="biosimulations", + **self.common, + annotation_style="miriam", ) self.assertIsNotNone(annotation) - print(annotation) + newdoc = neuroml.NeuroMLDocument(id="test") + newdoc.annotation = neuroml.Annotation([annotation]) + write_neuroml2_file(newdoc, "TestAnnotation.xml") + extract_annotations("TestAnnotation.xml") From d6d0c985e281f5ea483691ad54d873c0599cf64e Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Thu, 2 May 2024 18:42:42 +0100 Subject: [PATCH 04/20] wip: extracting annotations --- pyneuroml/annotations.py | 45 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 72e3d82c..9288af12 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -20,7 +20,7 @@ logger.setLevel(logging.INFO) try: - from rdflib import BNode, Graph, Literal, Namespace, URIRef, Bag + from rdflib import BNode, Graph, Literal, Namespace, URIRef, Bag, Container from rdflib.namespace import DC, DCTERMS, FOAF, RDFS, RDF except ImportError: logger.warning("Please install optional dependencies to use annotation features:") @@ -520,8 +520,47 @@ def extract_annotations(self, nml2_file: str) -> None: logger.debug(contents) self.doc.parse(data=contents, format="application/rdf+xml") - # for s, p, o in g: - # print(f"{s}: {p}: {o}") + for desc, pred in self.ARG_MAP.items(): + annotations[desc] = [] + objs = self.doc.objects(predicate=pred) + for obj in objs: + print(f"Iterating: {desc}: {obj} ({type(obj)})") + if isinstance(obj, Literal): + annotations[desc] = str(obj) + if isinstance(obj, BNode): + for cobj in self.doc.objects(obj): + print(f"Iterating BNode: {desc}: {cobj} ({type(cobj)})") + if isinstance(cobj, URIRef): + # a bag, ignore + if str(cobj).endswith("ns#Bag"): + continue + + # check if it's a subject for other triples + # (authors/contributors) + gen = self.doc.predicate_objects(subject=cobj) + lenitems = sum(1 for _ in gen) + print(f"Len items is {lenitems}") + + # a "plain" URIRef + if lenitems == 0: + annotations[desc].append(str(cobj)) + + # local reference + if lenitems > 0: + gen = self.doc.predicate_objects(subject=cobj) + bits = [] + for pred, pobj in gen: + print( + f"Found: {desc}: {pred} {pobj} ({type(pobj)})" + ) + bits.append(str(pobj)) + annotations[desc].append(bits) + + elif isinstance(cobj, Literal): + annotations[desc].append(str(cobj)) + # another bnode: parse it again (recurse?) + else: + print(f"BNod else: {desc}: {cobj} ({type(cobj)})") # for r in _find_elements(a, "Description", rdf=True): # desc = _get_attr_in_element(r, "about", rdf=True) From 79071f5c5b0d613c1d726e33cc0cb9c8a2032112 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 10:22:47 +0100 Subject: [PATCH 05/20] chore(xml-utils): document functions --- pyneuroml/utils/xml.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/pyneuroml/utils/xml.py b/pyneuroml/utils/xml.py index db98a2f7..805b4e9c 100644 --- a/pyneuroml/utils/xml.py +++ b/pyneuroml/utils/xml.py @@ -7,15 +7,40 @@ Copyright 2024 NeuroML contributors """ +import typing +from lxml import etree -def _find_elements(el, name, rdf=False): + +def _find_elements(el: etree.Element, name: str, rdf: bool = False) -> typing.Iterator: + """Find elements with name in an XML string with root el + + :param el: root element + :type el: etree.Element + :param name: name of element to find + :type name: str + :param rdf: toggle whether elements are in an RDF namespace + :type rdf: bool + :returns: iterator over elements with name + """ ns = "http://www.neuroml.org/schema/neuroml2" if rdf: ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" return el.findall(".//{%s}%s" % (ns, name)) -def _get_attr_in_element(el, name, rdf=False): +def _get_attr_in_element( + el: etree.Element, name: str, rdf: bool = False +) -> typing.Optional[str]: + """Get value of an attribute name in element el + + :param el: element + :type el: etree.Element + :param name: attribute name + :type name: str + :param rdf: toggle whether elements are in an RDF namespace + :type rdf: bool + :returns: value of attribute or None of no attribute is found + """ ns = "http://www.neuroml.org/schema/neuroml2" if rdf: ns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" From e07169b0119031eadba89b04c418c53146636fdd Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 11:29:42 +0100 Subject: [PATCH 06/20] feat(annotation): tweak valid URIRef prefixes --- pyneuroml/annotations.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 9288af12..5caef35a 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -589,15 +589,20 @@ def extract_annotations(self, nml2_file: str) -> None: def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: """Create a URIRef or Literal depending on string. + If a string begins with http:, https:, or file:, it is assumed to be a + URIRef. + :param astr: a string to create URIRef or Literal for :type astr: str :returns: a URIRef or Literal """ - if astr.startswith("http:"): - return URIRef(astr) - else: - return Literal(astr) + prefixes = ["http:", "https:", "file:"] + for p in prefixes: + if astr.startswith(p): + return URIRef(astr) + + return Literal(astr) def create_annotation(*args, **kwargs): From 3297d2cc417bb14e170656feaa97c60e347fd532 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 11:57:11 +0100 Subject: [PATCH 07/20] fix(annotations): correct creation keywords --- pyneuroml/annotations.py | 60 +++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 25 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 5caef35a..80b17878 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -95,8 +95,8 @@ def __init__(self): "title": DC.title, "abstract": DCTERMS.abstract, "description": DC.description, - "keyword": PRISM.keyword, - "thumbnail": COLLEX.thumbnail, + "keywords": PRISM.keyword, + "thumbnails": COLLEX.thumbnail, "organisms": BQBIOL.hasTaxon, "encodes_other_biology": BQBIOL.encodes, "has_version": BQBIOL.hasVersion, @@ -308,10 +308,7 @@ def create_annotation( for arg, val in mylocals.items(): if arg in self.ARG_MAP.keys(): # handle any special cases - if arg == "abstract" or arg == "description": - self.doc.add((subjectobj, self.ARG_MAP[arg], Literal(val))) - - elif arg == "thumbnails": + if arg == "thumbnails": prefixed = [ f"{fileprefix}/{t}" if (not t.startswith("http")) else t for t in val @@ -403,35 +400,48 @@ def _add_element( "Annotation style must either be 'miriam' or 'biosimulations'" ) + logger.debug(f"Processing element {node_type}: {info_dict} ({type(info_dict)})") + # do nothing if an empty dict is passed if info_dict is None: return - # if not a dict, try to create a dict with blank values - if not isinstance(info_dict, dict): - copy_dict = {} # type: typing.Dict[str, str] - for i in info_dict: - copy_dict[i] = "" - info_dict = copy_dict - # for biosimulations, we do not use bags if annotation_style == "biosimulations": - for idf, label in info_dict.items(): - # add a top level node - top_node = BNode() - self.doc.add((subjectobj, node_type, top_node)) - self.doc.add((top_node, DC.identifier, URIRef(idf))) - if len(label) > 0: - self.doc.add((top_node, RDFS.label, Literal(label))) + if isinstance(info_dict, dict): + for idf, label in info_dict.items(): + # add a top level node + top_node = BNode() + self.doc.add((subjectobj, node_type, top_node)) + self.doc.add((top_node, DC.identifier, URIRef(idf))) + if len(label) > 0: + self.doc.add((top_node, RDFS.label, Literal(label))) + elif isinstance(info_dict, list): + for it in info_dict: + self.doc.add((subjectobj, node_type, _URIRef_or_Literal(it))) + elif isinstance(info_dict, str): + self.doc.add((subjectobj, node_type, _URIRef_or_Literal(info_dict))) + else: + raise ValueError(f"Could not parse: {node_type}: {info_dict}") + elif annotation_style == "miriam": # even if there's only one entry, we still create a bag. # this seems to be the norm in the SBML examples # https://raw.githubusercontent.com/combine-org/combine-specifications/main/specifications/files/sbml.level-3.version-2.core.release-2.pdf - top_node = BNode() - self.doc.add((subjectobj, node_type, top_node)) - bag = Bag(self.doc, top_node, []) - for idf, label in info_dict.items(): - bag.append(_URIRef_or_Literal(idf)) + if isinstance(info_dict, str): + self.doc.add((subjectobj, node_type, _URIRef_or_Literal(info_dict))) + else: + top_node = BNode() + self.doc.add((subjectobj, node_type, top_node)) + bag = Bag(self.doc, top_node, []) + if isinstance(info_dict, list): + for idf in info_dict: + bag.append(_URIRef_or_Literal(idf)) + elif isinstance(info_dict, dict): + for idf, label in info_dict.items(): + bag.append(_URIRef_or_Literal(idf)) + else: + raise ValueError(f"Could not parse: {node_type}: {info_dict}") def _add_humans( self, From e453d733d0a77ea5b034a53ffe0e79cf22783e65 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 11:57:43 +0100 Subject: [PATCH 08/20] test(annotations): split creation tests --- tests/test_annotations.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index cd2166a9..0a2b23c3 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -52,11 +52,11 @@ class TestAnnotations(BaseTestCase): "see_also": {"http://link.com": "a link"}, "references": {"http://reference.com": "a reference"}, "funders": {"http://afundingbody.org": "a funding body"}, - "license": {"CC0": "license"}, + "license": {"https://identifiers.org/spdx:CC0": "CC0"}, } - def test_create_annotation(self): - """Test create_annotations""" + def test_create_annotation_miriam(self): + """Test create_annotations: MIRIAM""" common1 = copy.deepcopy(self.common) annotation = create_annotation(**common1, annotation_style="miriam") self.assertIsNotNone(annotation) @@ -67,7 +67,8 @@ def test_create_annotation(self): self.assertIsNone(newdoc.annotation.validate()) self.assertIsNone(newdoc.validate(recursive=True)) - # biosimulations + def test_create_annotation_biosimulations(self): + """Test create_annotations: biosimulations""" common2 = copy.deepcopy(self.common) annotation2 = create_annotation(**common2, annotation_style="biosimulations") self.assertIsNotNone(annotation2) From 02ba6a1d6a58e367aed815294b789b6a659b88e3 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 16:17:30 +0100 Subject: [PATCH 09/20] feat(annotations): improve testing of both miriam and biosimulation styles --- tests/test_annotations.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 0a2b23c3..5eacde34 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -74,8 +74,9 @@ def test_create_annotation_biosimulations(self): self.assertIsNotNone(annotation2) print(annotation2) - def test_extract_annotations(self): + def test_extract_annotations_miriam(self): """Test the extract_annotations function.""" + fname = "TestAnnotationMiriam.xml" annotation = create_annotation( **self.common, annotation_style="miriam", @@ -83,5 +84,27 @@ def test_extract_annotations(self): self.assertIsNotNone(annotation) newdoc = neuroml.NeuroMLDocument(id="test") newdoc.annotation = neuroml.Annotation([annotation]) - write_neuroml2_file(newdoc, "TestAnnotation.xml") - extract_annotations("TestAnnotation.xml") + write_neuroml2_file(newdoc, fname) + extracted = extract_annotations(fname) + for key, val in extracted["test"].items(): + if val is not None and len(val) != 0: + print(f"{key}: {val} vs {self.common[key]}") + self.assertEqual(len(val), len(self.common[key])) + + def test_extract_annotations_biosimulations(self): + """Test the extract_annotations function.""" + fname = "TestAnnotationBiosimulations.xml" + annotation = create_annotation( + **self.common, + annotation_style="biosimulations", + ) + self.assertIsNotNone(annotation) + + newdoc = neuroml.NeuroMLDocument(id="test") + newdoc.annotation = neuroml.Annotation([annotation]) + write_neuroml2_file(newdoc, fname) + extracted = extract_annotations(fname) + for key, val in extracted["test"].items(): + if val is not None and len(val) != 0: + print(f"{key}: {val} vs {self.common[key]}") + self.assertEqual(len(val), len(self.common[key])) From 77d443f7f2957847f8c433262336d70a0b453349 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 16:50:47 +0100 Subject: [PATCH 10/20] feat(annotations): update extraction code --- pyneuroml/annotations.py | 271 ++++++++++++++++++++++++++++++++------- 1 file changed, 223 insertions(+), 48 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 80b17878..66c1f6b0 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -20,8 +20,8 @@ logger.setLevel(logging.INFO) try: - from rdflib import BNode, Graph, Literal, Namespace, URIRef, Bag, Container - from rdflib.namespace import DC, DCTERMS, FOAF, RDFS, RDF + from rdflib import BNode, Graph, Literal, Namespace, URIRef, Bag + from rdflib.namespace import DC, DCTERMS, FOAF, RDFS except ImportError: logger.warning("Please install optional dependencies to use annotation features:") logger.warning("pip install pyneuroml[annotations]") @@ -302,8 +302,6 @@ def create_annotation( # get the args passed to this function mylocals = locals() - self.doc.add((subjectobj, self.ARG_MAP["title"], Literal(title))) - # loop over the rest for arg, val in mylocals.items(): if arg in self.ARG_MAP.keys(): @@ -513,87 +511,264 @@ def _add_humans( foaf_type = DC.identifier self.doc.add((ref, foaf_type, _URIRef_or_Literal(idf))) - def extract_annotations(self, nml2_file: str) -> None: + def extract_annotations( + self, nml2_file: str + ) -> typing.Dict[str, typing.Dict[typing.Any, typing.Any]]: """Extract and print annotations from a NeuroML 2 file. :param nml2_file: name of NeuroML2 file to parse :type nml2_file: str + :returns: dictionaries with annotations information + :rtype: dict + """ + pp = pprint.PrettyPrinter(width=100) + annotations = {} # type: typing.Dict + + with open(nml2_file, "r") as f: + root = etree.parse(f).getroot() + + for a in _find_elements(root, "annotation"): + parent = a.getparent() + logger.debug(f"Parent is {parent}") + # TODO: debug + # _get_attr_in_element doesn't quite work correctly + try: + obj_id = parent.attrib["id"] + except KeyError: + obj_id = "" + annotations[obj_id] = self.parse_rdf(a) + + logger.info("Annotations in %s: " % (nml2_file)) + pp.pprint(annotations) + return annotations + + def extract_annotations_from_string( + self, xml_string: str + ) -> typing.Dict[str, typing.Dict[typing.Any, typing.Any]]: + """Extract and print annotations from a NeuroML 2 string. + + :param xml_string: XML string to parse + :type xml_string: str + :returns: list of dictionaries with annotations information + :rtype: list of dict """ pp = pprint.PrettyPrinter() - test_file = open(nml2_file) - root = etree.parse(test_file).getroot() - annotations = {} # type: dict + annotations = {} # type: typing.Dict + root = etree.fromstring(xml_string).getroot() for a in _find_elements(root, "annotation"): - for r in _find_elements(a, "RDF", rdf=True): + parent = a.getparent() + logger.debug(f"Parent is {parent}") + try: + obj_id = parent.attrib["id"] + except KeyError: + obj_id = "" + annotations[obj_id] = self.parse_rdf(a) + + logger.info("Annotations:") + pp.pprint(annotations) + return annotations + + def parse_rdf( + self, annotation_element: etree.Element + ) -> typing.Dict[str, typing.Any]: + """Parse RDF from an element. + + Note that this is not a general purpose RDF parser. + It is a specific parser for the RDF annotations that are used in + NeuroML (which the py:func:`create_annotation` method can write). + + :param annotation_element: an element + :type annotation_element: etree.Element + :returns: annotation dictionary + + """ + annotations = {} # type: typing.Dict + # guess if it's biosimulations format or MIRIAM + # biosimulations does not use bags + bags = _find_elements(annotation_element, "Bag", rdf=True) + + if len(list(bags)) > 0: + logger.debug("Parsing MIRIAM style annotation") + # MIRIAM annotations, as written by create_annotation can be of a + # few forms: + + # 1. elements without any nesting, that contain literals as the + # objects (the root node is the subject, ex: title, description, + # abstract) + + # 2. elements that may contain multiple objects and are so contained + # in containers (bags) + # for these, the top level node is a "blank node" that includes the + # bag, which includes the items. + # note that the objects here may refer to other local subjects, + # eg: creators/contributors will contain a list of local reference + # to other elements + + # 3. elements that may contain multiple objects contained in another + # element (modification date) + # for these, there are two levels of blank nodes before the + # bags/items. + for r in _find_elements(annotation_element, "RDF", rdf=True): contents = etree.tostring(r, pretty_print=True).decode("utf-8") logger.debug(contents) self.doc.parse(data=contents, format="application/rdf+xml") for desc, pred in self.ARG_MAP.items(): annotations[desc] = [] + # get all objects that use the predicate and iterate over + # them objs = self.doc.objects(predicate=pred) for obj in objs: - print(f"Iterating: {desc}: {obj} ({type(obj)})") + logger.debug( + f"Iterating {pred} objects: {desc}: {obj} ({type(obj)})" + ) + # Literals: description, title, abstract if isinstance(obj, Literal): annotations[desc] = str(obj) - if isinstance(obj, BNode): + # nested elements: ones with blank nodes that contain + # bags with lists; the lists can contain local + # references to other elements + + elif isinstance(obj, BNode): + # the objects for the top level BNode subject will + # be the bags, and the items in the bags, all + # returned as a list for cobj in self.doc.objects(obj): - print(f"Iterating BNode: {desc}: {cobj} ({type(cobj)})") + logger.debug( + f"Iterating BNode objects: {desc}: {cobj} ({type(cobj)})" + ) if isinstance(cobj, URIRef): # a bag, ignore + bagged = False if str(cobj).endswith("ns#Bag"): + bagged = True + logger.debug("Ignoring Bag") continue - # check if it's a subject for other triples + # the list item can be a local reference to + # another triple (authors/contributors) + # so, check if it's a subject for other triples # (authors/contributors) gen = self.doc.predicate_objects(subject=cobj) lenitems = sum(1 for _ in gen) - print(f"Len items is {lenitems}") + logger.debug(f"Len items is {lenitems}") - # a "plain" URIRef + # no: it's a "plain" URIRef if lenitems == 0: annotations[desc].append(str(cobj)) - # local reference + # yes, it's a local reference if lenitems > 0: gen = self.doc.predicate_objects(subject=cobj) bits = [] for pred, pobj in gen: - print( + logger.debug( f"Found: {desc}: {pred} {pobj} ({type(pobj)})" ) bits.append(str(pobj)) - annotations[desc].append(bits) + if len(bits) == 1: + annotations[desc].append(bits[0]) + else: + annotations[desc].append(bits) + # a literal, eg: creation date elif isinstance(cobj, Literal): - annotations[desc].append(str(cobj)) - # another bnode: parse it again (recurse?) + if bagged: + annotations[desc].append(str(cobj)) + else: + annotations[desc] = str(cobj) + # another bnode: eg: modification date else: - print(f"BNod else: {desc}: {cobj} ({type(cobj)})") - - # for r in _find_elements(a, "Description", rdf=True): - # desc = _get_attr_in_element(r, "about", rdf=True) - # annotations[desc] = [] - # - # annotations[desc] = g.serialize(format="turtle2") - # - # for info in r: - # if isinstance(info.tag, str): - # kind = info.tag.replace( - # "{http://biomodels.net/biology-qualifiers/}", "bqbiol:" - # ) - # kind = kind.replace( - # "{http://biomodels.net/model-qualifiers/}", "bqmodel:" - # ) - # - # for li in _find_elements(info, "li", rdf=True): - # attr = _get_attr_in_element(li, "resource", rdf=True) - # if attr: - # annotations[desc].append({kind: attr}) - - logger.info("Annotations in %s: " % (nml2_file)) - pp.pprint(annotations) + logger.debug( + f"BNode nested in BNode: {desc}: {cobj} ({type(cobj)})" + ) + for ccobj in self.doc.objects(cobj): + logger.debug( + f"Iterating nested BNode: {desc}: {ccobj} ({type(ccobj)})" + ) + if str(ccobj).endswith("ns#Bag"): + logger.debug("Ignoring Bag") + continue + # a literal, eg: creation date + elif isinstance(ccobj, Literal): + annotations[desc].append(str(ccobj)) + + # biosimulations has a flat structure, since no containers (bags) are + # used. + else: + logger.debug("Parsing biosimulations style annotation") + for r in _find_elements(annotation_element, "RDF", rdf=True): + contents = etree.tostring(r, pretty_print=True).decode("utf-8") + logger.debug(contents) + self.doc.parse(data=contents, format="application/rdf+xml") + + for desc, pred in self.ARG_MAP.items(): + # Since containers are not used, we cannot tell if there + # are multiple objects of the same predicate + # Even if we count them, a multi-element object could just + # have one element, which will be equivalent to an object + # that should not have multiple elements + # So, we initialize manually + if desc in ["title", "abstract", "description", "creation_date"]: + annotations[desc] = None + # Everything else is an iterable + # Most are dicts, others are lists + else: + annotations[desc] = {} + + for obj in self.doc.objects(predicate=pred): + logger.debug(f"Iterating: {desc}: {obj} ({type(obj)})") + if isinstance(obj, BNode): + idfs = [] + labels = [] + for pred_, cobj in self.doc.predicate_objects(obj): + logger.debug( + f"Iterating BNode objects: {desc}: {pred_}: {cobj} ({type(cobj)})" + ) + if pred_ == RDFS.label: + labels.append(str(cobj)) + else: + idfs.append(str(cobj)) + + logger.debug(f"idfs: {idfs}") + logger.debug(f"labels: {labels}") + # id/label pairs + if len(idfs) == 1 and len(labels) == 1: + # using dict(zip..) splits the space separated + # strings into different labels + biosim_bits = dict(zip(idfs, labels)) + # predicate with single entry + if annotations[desc] is None: + annotations[desc] = biosim_bits + else: + annotations[desc].update(biosim_bits) + # label with multiple idfs + if len(idfs) > 1 and len(labels) == 1: + annotations[desc].update({labels[0]: set(idfs)}) + # no label, nested data + if len(idfs) == 1 and len(labels) == 0: + if annotations[desc] is None: + annotations[desc] = idfs[0] + else: + annotations[desc].append(idfs) + if len(idfs) > 1 and len(labels) == 0: + if len(annotations[desc]) == 0: + annotations[desc] = idfs + else: + annotations[desc].extend(idfs) + # not bnodes, top level objects + else: + # text objects (literals) + if annotations[desc] is None: + annotations[desc] = str(obj) + # objects that may be lists: keywords, thumbnails + else: + if len(annotations[desc]) == 0: + annotations[desc] = [str(obj)] + else: + annotations[desc].append(str(obj)) + return annotations def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: @@ -628,10 +803,10 @@ def create_annotation(*args, **kwargs): def extract_annotations(nml2_file: str): """Wrapper around the Annotations.extract_annotations method. - - :param *args: TODO - :param **kwargs: TODO - :returns: TODO + :param nml2_file: name of NeuroML2 file to parse + :type nml2_file: str + :returns: dictionaries with annotations information + :rtype: dict """ new_annotation = Annotation() From 3426c6bee2aa736c30dc119d657672176c84dd4d Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 16:52:58 +0100 Subject: [PATCH 11/20] feat(annotations): remove temporary test file --- pyneuroml/annotations.py | 7 ++++--- tests/test_annotations.py | 8 ++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 66c1f6b0..1309250e 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -7,20 +7,21 @@ Copyright 2024 NeuroML contributors """ -import re import logging import pprint -import typing +import re import textwrap +import typing from lxml import etree + from pyneuroml.utils.xml import _find_elements logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) try: - from rdflib import BNode, Graph, Literal, Namespace, URIRef, Bag + from rdflib import Bag, BNode, Graph, Literal, Namespace, URIRef from rdflib.namespace import DC, DCTERMS, FOAF, RDFS except ImportError: logger.warning("Please install optional dependencies to use annotation features:") diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 5eacde34..81ff7391 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -7,12 +7,13 @@ Copyright 2024 NeuroML contributors """ +import copy import logging +import os +import neuroml from pyneuroml.annotations import create_annotation, extract_annotations from pyneuroml.io import write_neuroml2_file -import neuroml -import copy from . import BaseTestCase @@ -90,6 +91,7 @@ def test_extract_annotations_miriam(self): if val is not None and len(val) != 0: print(f"{key}: {val} vs {self.common[key]}") self.assertEqual(len(val), len(self.common[key])) + os.unlink(fname) def test_extract_annotations_biosimulations(self): """Test the extract_annotations function.""" @@ -108,3 +110,5 @@ def test_extract_annotations_biosimulations(self): if val is not None and len(val) != 0: print(f"{key}: {val} vs {self.common[key]}") self.assertEqual(len(val), len(self.common[key])) + + os.unlink(fname) From cc6f220d2f1bf48ba9d900ab76d6a719e03479c9 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 16:54:27 +0100 Subject: [PATCH 12/20] chore(pre-commit): include ruff linter for isort Import sorting requires the linter: https://docs.astral.sh/ruff/formatter/#sorting-imports --- .pre-commit-config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cae9c9e9..298d8ad2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,4 +10,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.4.1 hooks: + - id: ruff + args: [ -I --fix ] - id: ruff-format From 07a1f4730e348461d0d5fe46e9eb4f35cd8ff64c Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 17:07:46 +0100 Subject: [PATCH 13/20] chore(pre-commit): fix linter args --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 298d8ad2..5f83cf7e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,5 +11,5 @@ repos: rev: v0.4.1 hooks: - id: ruff - args: [ -I --fix ] + args: [ "--select", "I", "--fix" ] - id: ruff-format From 0debe5b81379751d0c594a57e30370cb86ab6914 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 17:08:31 +0100 Subject: [PATCH 14/20] feat(annotation): ignore empty arguments while creating --- pyneuroml/annotations.py | 6 ++++++ tests/test_biosimulations.py | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 1309250e..bb6f07ab 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -306,6 +306,8 @@ def create_annotation( # loop over the rest for arg, val in mylocals.items(): if arg in self.ARG_MAP.keys(): + if val is None: + continue # handle any special cases if arg == "thumbnails": prefixed = [ @@ -468,6 +470,10 @@ def _add_humans( "Annotation style must either be 'miriam' or 'biosimulations'" ) + # do nothing if an empty dict is passed + if info_dict is None: + return + # if not a dict, create a dict with blank values if not isinstance(info_dict, dict): copy_dict = {} # type: typing.Dict[str, typing.Dict] diff --git a/tests/test_biosimulations.py b/tests/test_biosimulations.py index 8443afc1..d7644254 100644 --- a/tests/test_biosimulations.py +++ b/tests/test_biosimulations.py @@ -7,7 +7,6 @@ Copyright 2024 NeuroML contributors """ - import logging import os import pathlib From 736f4f85c7765c031c1ab92557610ea0489fd594 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 21:14:14 +0100 Subject: [PATCH 15/20] feat(annotations): handle license specially Because, with MIRIAM, the license is provided as a dict, but encoded as a string or a ref. To decode the RDF, it needs to be converted back to a set from the string. --- pyneuroml/annotations.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index bb6f07ab..3ce9a69d 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -320,8 +320,12 @@ def create_annotation( elif arg == "license": assert len(val.items()) == 1 + if annotation_style == "miriam": + l_string = list(val.keys())[0] + else: + l_string = val self._add_element( - subjectobj, val, self.ARG_MAP[arg], annotation_style + subjectobj, l_string, self.ARG_MAP[arg], annotation_style ) elif arg == "authors" or arg == "contributors": @@ -630,9 +634,17 @@ def parse_rdf( logger.debug( f"Iterating {pred} objects: {desc}: {obj} ({type(obj)})" ) - # Literals: description, title, abstract - if isinstance(obj, Literal): + + # handle specially to return in the form supplied to + # create_annotation + if desc == "license": + annotations[desc] = {str(obj)} + continue + + # top level Literals or URIRefs: description, title, abstract + if isinstance(obj, Literal) or isinstance(obj, URIRef): annotations[desc] = str(obj) + # nested elements: ones with blank nodes that contain # bags with lists; the lists can contain local # references to other elements @@ -701,6 +713,9 @@ def parse_rdf( elif isinstance(ccobj, Literal): annotations[desc].append(str(ccobj)) + else: + raise ValueError(f"Unrecognised element type: {obj}") + # biosimulations has a flat structure, since no containers (bags) are # used. else: From 8cb599cb5f95acb9908ba40a2bb8b890bd4d2972 Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 21:17:19 +0100 Subject: [PATCH 16/20] test(annotation): improve checks --- tests/test_annotations.py | 43 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index 81ff7391..de78d569 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -12,6 +12,7 @@ import os import neuroml + from pyneuroml.annotations import create_annotation, extract_annotations from pyneuroml.io import write_neuroml2_file @@ -24,13 +25,11 @@ class TestAnnotations(BaseTestCase): """Test annotations module""" - common = { - "subject": "model.nml", + annotation_args = { "description": "A tests model", "abstract": "lol, something nice", "keywords": ["something", "and something"], "thumbnails": ["lol.png"], - "xml_header": False, "organisms": { "http://identifiers.org/taxonomy/4896": "Schizosaccharomyces pombe" }, @@ -55,11 +54,16 @@ class TestAnnotations(BaseTestCase): "funders": {"http://afundingbody.org": "a funding body"}, "license": {"https://identifiers.org/spdx:CC0": "CC0"}, } + func_args = { + "xml_header": False, + "subject": "model.nml", + } def test_create_annotation_miriam(self): """Test create_annotations: MIRIAM""" - common1 = copy.deepcopy(self.common) - annotation = create_annotation(**common1, annotation_style="miriam") + annotation = create_annotation( + **self.annotation_args, **self.func_args, annotation_style="miriam" + ) self.assertIsNotNone(annotation) print(annotation) @@ -70,8 +74,9 @@ def test_create_annotation_miriam(self): def test_create_annotation_biosimulations(self): """Test create_annotations: biosimulations""" - common2 = copy.deepcopy(self.common) - annotation2 = create_annotation(**common2, annotation_style="biosimulations") + annotation2 = create_annotation( + **self.annotation_args, **self.func_args, annotation_style="biosimulations" + ) self.assertIsNotNone(annotation2) print(annotation2) @@ -79,7 +84,8 @@ def test_extract_annotations_miriam(self): """Test the extract_annotations function.""" fname = "TestAnnotationMiriam.xml" annotation = create_annotation( - **self.common, + **self.annotation_args, + **self.func_args, annotation_style="miriam", ) self.assertIsNotNone(annotation) @@ -87,17 +93,21 @@ def test_extract_annotations_miriam(self): newdoc.annotation = neuroml.Annotation([annotation]) write_neuroml2_file(newdoc, fname) extracted = extract_annotations(fname) - for key, val in extracted["test"].items(): - if val is not None and len(val) != 0: - print(f"{key}: {val} vs {self.common[key]}") - self.assertEqual(len(val), len(self.common[key])) + for key, val in self.annotation_args.items(): + print(f"{key}: {val} vs {extracted['test'][key]}") + # miriam only has keys + if isinstance(val, dict): + self.assertEqual(len(val), len(extracted["test"][key])) + elif isinstance(val, str): + self.assertEqual(val, extracted["test"][key]) os.unlink(fname) def test_extract_annotations_biosimulations(self): """Test the extract_annotations function.""" fname = "TestAnnotationBiosimulations.xml" annotation = create_annotation( - **self.common, + **self.annotation_args, + **self.func_args, annotation_style="biosimulations", ) self.assertIsNotNone(annotation) @@ -106,9 +116,8 @@ def test_extract_annotations_biosimulations(self): newdoc.annotation = neuroml.Annotation([annotation]) write_neuroml2_file(newdoc, fname) extracted = extract_annotations(fname) - for key, val in extracted["test"].items(): - if val is not None and len(val) != 0: - print(f"{key}: {val} vs {self.common[key]}") - self.assertEqual(len(val), len(self.common[key])) + for key, val in self.annotation_args.items(): + print(f"{key}: {val} vs {extracted['test'][key]}") + self.assertEqual(len(val), len(extracted["test"][key])) os.unlink(fname) From 909a384db52eb2b6cc8a0e1be488957ea9d0777d Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 23:33:08 +0100 Subject: [PATCH 17/20] feat(annotations): add orcid identifier --- pyneuroml/annotations.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 3ce9a69d..38b279ab 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -32,20 +32,13 @@ # From https://doi.org/10.1515/jib-2021-0020 (page 3) # rdf, foaf, dc already included in rdflib NAMESPACES_MAP = { - "orcid": "https://orcid.org/", "bqmodel": "http://biomodels.net/model-qualifiers/", "bqbiol": "http://biomodels.net/biology-qualifiers/", - "pubmed": "https://identifiers.org/pubmed:", - "NCBI_Taxon": "https://identifiers.org/taxonomy:", - "biomod": "https://identifiers.org/biomodels.db:", - "chebi": "https://identifiers.org/CHEBI:", - "uniprot": "https://identifiers.org/uniprot:", - "obp": "https://identifiers.org/opb:", - "fma": "https://identifiers.org/FMA:", "semsim": "http://bime.uw.edu/semsim/", "prism": "http://prismstandard.org/namespaces/basic/2.0/", "collex": "http://www.collex.org/schema", "scoro": "http://purl.org/spar/scoro", + "orcid": "https://orcid.org/", } @@ -55,6 +48,7 @@ BQBIOL = Namespace(NAMESPACES_MAP["bqbiol"]) BQMODEL = Namespace(NAMESPACES_MAP["bqmodel"]) SCORO = Namespace(NAMESPACES_MAP["scoro"]) +ORCID = Namespace(NAMESPACES_MAP["orcid"]) class Annotation(object): @@ -88,6 +82,7 @@ def __init__(self): self.doc.bind("bqbiol", BQBIOL) self.doc.bind("bqmodel", BQMODEL) self.doc.bind("scoro", SCORO) + self.doc.bind("orcid", ORCID) for k, v in self.P_MAP_EXTRA.items(): self.doc.bind(f"{v}:{k}", f"v.upper()/{k}") @@ -495,9 +490,12 @@ def _add_humans( # other fields for idf, label in info.items(): - try: - foaf_type = getattr(FOAF, label) - except AttributeError: + if label == "orcid": + foaf_type = ORCID.id + else: + foaf_type = getattr(FOAF, label, None) + + if foaf_type is None: logger.info("Not a FOAF attribute, using DC.identifier") foaf_type = DC.identifier self.doc.add((top_node, foaf_type, _URIRef_or_Literal(idf))) @@ -515,9 +513,12 @@ def _add_humans( # individual nodes for details self.doc.add((ref, FOAF.name, Literal(name))) for idf, label in info.items(): - try: - foaf_type = getattr(FOAF, label) - except AttributeError: + if label == "orcid": + foaf_type = ORCID.id + else: + foaf_type = getattr(FOAF, label, None) + + if foaf_type is None: logger.info("Not a FOAF attribute, using DC.identifier") foaf_type = DC.identifier self.doc.add((ref, foaf_type, _URIRef_or_Literal(idf))) From d0a544ac578ad1c73c421c857d6e53a10934fdac Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Fri, 3 May 2024 23:34:19 +0100 Subject: [PATCH 18/20] test(annotations): test orcid --- tests/test_annotations.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_annotations.py b/tests/test_annotations.py index de78d569..26f1a0f9 100644 --- a/tests/test_annotations.py +++ b/tests/test_annotations.py @@ -45,6 +45,7 @@ class TestAnnotations(BaseTestCase): "John Doe": { "https://someurl.com": "homepage", "https://anotherurl": "github", + "https://orcid.org/0000-0001-7568-7167": "orcid", }, "Jane Smith": {}, }, From 75d7f8c14cbf422e69f05d6f8b9b45b4696bd0fe Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 8 May 2024 15:32:11 +0100 Subject: [PATCH 19/20] chore(annotations): update docstrings --- pyneuroml/annotations.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 38b279ab..8f36bc7f 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -816,8 +816,12 @@ def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: def create_annotation(*args, **kwargs): """Wrapper around the Annotations.create_annotation method. - :param **kwargs: TODO - :returns: TODO + Please see py:func:`Annotations.create_annotations` for detailed + documentation. + + .. versionadded:: 1.2.13 + + :returns: annotation string """ new_annotation = Annotation() @@ -826,6 +830,12 @@ def create_annotation(*args, **kwargs): def extract_annotations(nml2_file: str): """Wrapper around the Annotations.extract_annotations method. + + Please see py:func:`Annotations.extract_annotations` for detailed + documentation. + + .. versionadded:: 1.2.13 + :param nml2_file: name of NeuroML2 file to parse :type nml2_file: str :returns: dictionaries with annotations information From bb5dbfb3e131a9f7e178ba555c98df6668a040ed Mon Sep 17 00:00:00 2001 From: "Ankur Sinha (Ankur Sinha Gmail)" Date: Wed, 8 May 2024 15:45:05 +0100 Subject: [PATCH 20/20] chore(annotations): fix doc strings [skip ci] --- pyneuroml/annotations.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pyneuroml/annotations.py b/pyneuroml/annotations.py index 8f36bc7f..cd325416 100644 --- a/pyneuroml/annotations.py +++ b/pyneuroml/annotations.py @@ -209,7 +209,9 @@ def create_annotation( select what style they want to use for the annotation. :type annotation_style: str :param serialization_format: format to serialize in using `rdflib.serialize` + See: https://rdflib.readthedocs.io/en/stable/plugin_serializers.html + :type serialization_format: str :param xml_header: toggle inclusion of xml header if serializing in xml format :type xml_header: bool @@ -256,13 +258,14 @@ def create_annotation( :param citations: related citations :type citations: dict(str, str) :param authors: authors + This can either be: - - a set: {"Author A", "Author B"} + - a set: :code:`{"Author A", "Author B"}` - a dictionary where the keys are author names and values are dictionaries of more metadata: - {"Author A": {"https://../": "accountname", "..": ".."}} + :code:`{"Author A": {"https://../": "accountname", "..": ".."}}` The inner dictionary should have the reference or literal as key, and can take a "label", which can be any of the FOAF attributes: @@ -584,11 +587,11 @@ def extract_annotations_from_string( def parse_rdf( self, annotation_element: etree.Element ) -> typing.Dict[str, typing.Any]: - """Parse RDF from an element. + """Parse RDF from an :code:`` element. Note that this is not a general purpose RDF parser. It is a specific parser for the RDF annotations that are used in - NeuroML (which the py:func:`create_annotation` method can write). + NeuroML (which the :py:func:`pyneuroml.annotations.Annotation.create_annotation` method can write). :param annotation_element: an element :type annotation_element: etree.Element @@ -816,7 +819,7 @@ def _URIRef_or_Literal(astr: str) -> typing.Union[URIRef, Literal]: def create_annotation(*args, **kwargs): """Wrapper around the Annotations.create_annotation method. - Please see py:func:`Annotations.create_annotations` for detailed + Please see :py:func:`pyneuroml.annotations.Annotation.create_annotation` for detailed documentation. .. versionadded:: 1.2.13 @@ -831,7 +834,7 @@ def create_annotation(*args, **kwargs): def extract_annotations(nml2_file: str): """Wrapper around the Annotations.extract_annotations method. - Please see py:func:`Annotations.extract_annotations` for detailed + Please see :py:func:`pyneuroml.annotations.Annotation.extract_annotations` for detailed documentation. .. versionadded:: 1.2.13