Skip to content

Cleanup public interface #589

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,33 @@

<img src="https://github.com/tis-lab/closed-illustrations/raw/master/logos/sssom-logos/sssom_logo_black_banner.png" />

SSSOM (Simple Standard for Sharing Ontology Mappings) is a TSV and RDF/OWL standard for ontology mappings
A Python library and command line interface (CLI) for working with
[SSSOM (Simple Standard for Sharing Ontology Mappings)](https://github.com/mapping-commons/sssom).

```
WARNING:
The export formats (json, rdf) of sssom-py are not yet finalised!
Please expect changes in future releases!
```
## Getting Started

A SSSOM TSV can be parsed with

```python
import sssom

See https://github.com/OBOFoundry/SSSOM
# other SSSOM files can be found on https://mapping-commons.github.io
url = "https://raw.githubusercontent.com/mapping-commons/mh_mapping_initiative/master/mappings/mp_hp_eye_impc.sssom.tsv"

# TSV can be parsed into a mapping set dataframe object,
# which includes a pandas DataFrame, a curies.Converter,
# and metadata
msdf = sssom.parse_tsv(url)

# SSSOM comes with several "write" functions
sssom.write_tsv(msdf, "test.tsv")
sssom.write_json(msdf, "test.json")
sssom.write_owl(msdf, "test.owl")
sssom.write_rdf(msdf, "test.ttl")
```

This is a python library and command line toolkit for working with SSSOM. It also defines a schema for SSSOM.
> [!WARNING]
> The export formats (json, rdf) of sssom-py are not yet finalised! Expect changes in future releases.

## Documentation

Expand Down
3 changes: 2 additions & 1 deletion src/sssom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
dataframe_to_ptable,
filter_redundant_rows,
group_mappings,
parse,
reconcile_prefix_and_data,
)

from .constants import generate_mapping_set_id, get_default_metadata # noqa:401
from .parsers import parse_tsv # noqa:401
from .writers import write_json, write_owl, write_rdf, write_tsv # noqa:401
6 changes: 5 additions & 1 deletion src/sssom/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import uuid
from enum import Enum
from functools import cached_property, lru_cache
from typing import Any, Dict, List, Literal, Set
from typing import Any, Dict, List, Literal, Set, TextIO, Union

import importlib_resources
import yaml
Expand Down Expand Up @@ -316,3 +316,7 @@ def get_default_metadata() -> MetadataType:
"mapping_set_id": generate_mapping_set_id(),
"license": DEFAULT_LICENSE,
}


#: A hint for functions that can take a path or an IO
PathOrIO = Union[str, pathlib.Path, TextIO]
48 changes: 38 additions & 10 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
SUBJECT_SOURCE,
SUBJECT_SOURCE_ID,
MetadataType,
PathOrIO,
_get_sssom_schema_object,
get_default_metadata,
)
Expand All @@ -76,7 +77,7 @@
# Parsers (from file)


def _open_input(input: Union[str, Path, TextIO]) -> io.StringIO:
def _open_input(input: PathOrIO) -> io.StringIO:
"""Transform a URL, a filepath (from pathlib), or a string (with file contents) to a StringIO object.

:param input: A string representing a URL, a filepath, or file contents,
Expand Down Expand Up @@ -284,12 +285,33 @@ def _get_converter_pop_replace_curie_map(sssom_metadata):


def parse_sssom_table(
file_path: Union[str, Path, TextIO],
file_path: PathOrIO,
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
**kwargs,
*,
strict: bool = False,
**kwargs: Any,
) -> MappingSetDataFrame:
"""Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
"""Parse a SSSOM TSV.

:param file_path:
A file path, URL, or I/O object that contains SSSOM encoded in TSV
:param prefix_map:
A prefix map or :class:`curies.Converter` used to validate prefixes,
CURIEs, and IRIs appearing in the SSSOM TSV
:param meta:
Additional document-level metadata for the SSSOM TSV document that is not
contained within the document itself. For example, this may come from a
companion SSSOM YAML file.
:param strict:
If true, will fail parsing for undefined prefixes, CURIEs, or IRIs
:param kwargs:
Additional keyword arguments (unhandled)
:returns:
A parsed dataframe wrapper object
"""
if kwargs:
logging.warning("unhandled keyword arguments passed: %s", kwargs)
if isinstance(file_path, Path) or isinstance(file_path, str):
raise_for_bad_path(file_path)
stream: io.StringIO = _open_input(file_path)
Expand All @@ -301,7 +323,7 @@ def parse_sssom_table(
is_valid_built_in_prefixes = _check_redefined_builtin_prefixes(sssom_metadata, meta, prefix_map)
is_valid_metadata = _is_irregular_metadata([sssom_metadata, meta])

if kwargs.get("strict"):
if strict:
_fail_in_strict_parsing_mode(is_valid_built_in_prefixes, is_valid_metadata)

# The priority order for combining prefix maps are:
Expand Down Expand Up @@ -334,8 +356,11 @@ def parse_sssom_table(
return msdf


parse_tsv = parse_sssom_table


def parse_sssom_rdf(
file_path: str,
file_path: Union[str, Path],
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
Expand Down Expand Up @@ -373,7 +398,10 @@ def parse_sssom_rdf(


def parse_sssom_json(
file_path: str, prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None, **kwargs
file_path: Union[str, Path],
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
**kwargs,
) -> MappingSetDataFrame:
"""Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
raise_for_bad_path(file_path)
Expand Down Expand Up @@ -407,7 +435,7 @@ def parse_sssom_json(


def parse_obographs_json(
file_path: str,
file_path: Union[str, Path],
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
mapping_predicates: Optional[List[str]] = None,
Expand Down Expand Up @@ -491,7 +519,7 @@ def _get_mapping_dict(


def parse_alignment_xml(
file_path: str,
file_path: Union[str, Path],
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
mapping_predicates: Optional[List[str]] = None,
Expand All @@ -501,7 +529,7 @@ def parse_alignment_xml(

converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta)
logging.info("Loading from alignment API")
xmldoc = minidom.parse(file_path)
xmldoc = minidom.parse(Path(file_path).resolve().as_posix())
msdf = from_alignment_minidom(
xmldoc,
prefix_map=converter,
Expand Down
6 changes: 0 additions & 6 deletions src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,12 +399,6 @@ class MappingSetDiff:
"""


def parse(filename: Union[str, Path]) -> pd.DataFrame:
"""Parse a TSV to a pandas frame."""
logging.info(f"Parsing {filename}")
return pd.read_csv(filename, sep="\t", comment="#")


def collapse(df: pd.DataFrame) -> pd.DataFrame:
"""Collapse rows with same S/P/O and combines confidence."""
df2 = df.groupby([SUBJECT_ID, PREDICATE_ID, OBJECT_ID])[CONFIDENCE].apply(max).reset_index()
Expand Down
67 changes: 47 additions & 20 deletions src/sssom/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

import json
import logging as _logging
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union
from typing import Any, Callable, Dict, Generator, List, Optional, TextIO, Tuple, Union

import pandas as pd
import yaml
Expand All @@ -18,7 +19,7 @@

from sssom.validators import check_all_prefixes_in_curie_map

from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX
from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX, PathOrIO
from .context import _load_sssom_context
from .parsers import to_mapping_set_document
from .util import (
Expand Down Expand Up @@ -47,12 +48,21 @@
MSDFWriter = Callable[[MappingSetDataFrame, TextIO], None]


@contextmanager
def _open_text_writer(xx: PathOrIO) -> Generator[TextIO, None, None]:
if isinstance(xx, (str, Path)):
with open(xx, "w") as file:
yield file
else:
yield xx


def write_table(
msdf: MappingSetDataFrame,
file: TextIO,
file: PathOrIO,
embedded_mode: bool = True,
serialisation="tsv",
sort=False,
serialisation: str = "tsv",
sort: bool = False,
) -> None:
"""Write a mapping set dataframe to the file as a table."""
sep = _get_separator(serialisation)
Expand All @@ -68,20 +78,31 @@ def write_table(
lines = [f"# {line}" for line in lines if line != ""]
s = msdf.df.to_csv(sep=sep, index=False).rstrip("\n")
lines = lines + [s]
for line in lines:
print(line, file=file)
with _open_text_writer(file) as fh:
for line in lines:
print(line, file=fh)
else:
if isinstance(file, (str, Path)):
yml_filepath = Path(file).with_suffix(".yaml")
else:
yml_filepath = Path(file.name.replace("tsv", "yaml"))

# Export MSDF as tsv
msdf.df.to_csv(file, sep=sep, index=False)
# Export Metadata as yaml
yml_filepath = file.name.replace("tsv", "yaml")
with open(yml_filepath, "w") as y:
yaml.safe_dump(meta, y)


def write_tsv(
msdf: MappingSetDataFrame, path: PathOrIO, embedded_mode: bool = True, sort: bool = False
) -> None:
"""Write a mapping set to a TSV file."""
write_table(msdf, path, serialisation="tsv", embedded_mode=embedded_mode, sort=sort)


def write_rdf(
msdf: MappingSetDataFrame,
file: TextIO,
file: PathOrIO,
serialisation: Optional[str] = None,
) -> None:
"""Write a mapping set dataframe to the file as RDF."""
Expand All @@ -97,17 +118,21 @@ def write_rdf(
check_all_prefixes_in_curie_map(msdf)
graph = to_rdf_graph(msdf=msdf)
t = graph.serialize(format=serialisation, encoding="utf-8")
print(t.decode(), file=file)
with _open_text_writer(file) as fh:
print(t.decode(), file=fh)


def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None:
def write_json(msdf: MappingSetDataFrame, output: PathOrIO, serialisation="json") -> None:
"""Write a mapping set dataframe to the file as JSON.

:param msdf: A mapping set dataframe
:param output: A path or write-supported file object to write JSON to
:param serialisation: The JSON format to use. Supported formats are:
- fhir_json: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)

- ``fhir_json``: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)
https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_fhir_json
- json: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
- ontoportal_json: Outputs JSON in Ontoportal format (https://ontoportal.org/)
- ``json``: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
- ``ontoportal_json``: Outputs JSON in Ontoportal format (https://ontoportal.org/)
https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_ontoportal_json
"""
func_map: Dict[str, Callable] = {
Expand All @@ -121,11 +146,12 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json")
)
func: Callable = func_map[serialisation]
data = func(msdf)
json.dump(data, output, indent=2)
with _open_text_writer(output) as fh:
json.dump(data, fh, indent=2)


@deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir_json") -> None:
def write_fhir_json(msdf: MappingSetDataFrame, output: PathOrIO, serialisation="fhir_json") -> None:
"""Write a mapping set dataframe to the file as FHIR ConceptMap JSON."""
if serialisation != "fhir_json":
raise ValueError(
Expand All @@ -136,7 +162,7 @@ def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fh

@deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
def write_ontoportal_json(
msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json"
msdf: MappingSetDataFrame, output: PathOrIO, serialisation: str = "ontoportal_json"
) -> None:
"""Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
if serialisation != "ontoportal_json":
Expand All @@ -148,7 +174,7 @@ def write_ontoportal_json(

def write_owl(
msdf: MappingSetDataFrame,
file: TextIO,
file: PathOrIO,
serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
) -> None:
"""Write a mapping set dataframe to the file as OWL."""
Expand All @@ -161,7 +187,8 @@ def write_owl(

graph = to_owl_graph(msdf)
t = graph.serialize(format=serialisation, encoding="utf-8")
print(t.decode(), file=file)
with _open_text_writer(file) as fh:
print(t.decode(), file=fh)


# Converters
Expand Down
10 changes: 9 additions & 1 deletion tests/test_collapse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Test various grouping functionalities."""

import unittest
from pathlib import Path
from typing import Union

import pandas as pd

from sssom.parsers import parse_sssom_table
from sssom.util import (
Expand All @@ -9,12 +13,16 @@
dataframe_to_ptable,
filter_redundant_rows,
group_mappings,
parse,
reconcile_prefix_and_data,
)
from tests.constants import data_dir


def parse(filename: Union[str, Path]) -> pd.DataFrame:
"""Parse a TSV to a pandas frame."""
return pd.read_csv(filename, sep="\t", comment="#")


class TestCollapse(unittest.TestCase):
"""Test various grouping functionalities."""

Expand Down
Loading
Loading