mapping-commons · cthoyt · May 23, 2025 · May 23, 2025 · May 23, 2025 · May 23, 2025
diff --git a/README.md b/README.md
@@ -21,17 +21,33 @@
 
 <img src="https://github.com/tis-lab/closed-illustrations/raw/master/logos/sssom-logos/sssom_logo_black_banner.png" />
 
-SSSOM (Simple Standard for Sharing Ontology Mappings) is a TSV and RDF/OWL standard for ontology mappings
+A Python library and command line interface (CLI) for working with
+[SSSOM (Simple Standard for Sharing Ontology Mappings)](https://github.com/mapping-commons/sssom).
 
-```
-WARNING: 
-    The export formats (json, rdf) of sssom-py are not yet finalised! 
-    Please expect changes in future releases!
-```
+## Getting Started
+
+A SSSOM TSV can be parsed with
+
+```python
+import sssom
 
-See https://github.com/OBOFoundry/SSSOM
+# other SSSOM files can be found on https://mapping-commons.github.io
+url = "https://raw.githubusercontent.com/mapping-commons/mh_mapping_initiative/master/mappings/mp_hp_eye_impc.sssom.tsv"
+
+# TSV can be parsed into a mapping set dataframe object,
+# which includes a pandas DataFrame, a curies.Converter,
+# and metadata
+msdf = sssom.parse_tsv(url)
+
+# SSSOM comes with several "write" functions
+sssom.write_tsv(msdf, "test.tsv")
+sssom.write_json(msdf, "test.json")
+sssom.write_owl(msdf, "test.owl")
+sssom.write_rdf(msdf, "test.ttl")
+```
 
-This is a python library and command line toolkit for working with SSSOM. It also defines a schema for SSSOM.
+> [!WARNING]  
+> The export formats (json, rdf) of sssom-py are not yet finalised! Expect changes in future releases.
 
 ## Documentation
 

diff --git a/src/sssom/__init__.py b/src/sssom/__init__.py
@@ -19,8 +19,9 @@
     dataframe_to_ptable,
     filter_redundant_rows,
     group_mappings,
-    parse,
     reconcile_prefix_and_data,
 )
 
 from .constants import generate_mapping_set_id, get_default_metadata  # noqa:401
+from .parsers import parse_tsv  # noqa:401
+from .writers import write_json, write_owl, write_rdf, write_tsv  # noqa:401
diff --git a/src/sssom/constants.py b/src/sssom/constants.py
@@ -4,7 +4,7 @@
 import uuid
 from enum import Enum
 from functools import cached_property, lru_cache
-from typing import Any, Dict, List, Literal, Set
+from typing import Any, Dict, List, Literal, Set, TextIO, Union
 
 import importlib_resources
 import yaml
@@ -316,3 +316,7 @@ def get_default_metadata() -> MetadataType:
         "mapping_set_id": generate_mapping_set_id(),
         "license": DEFAULT_LICENSE,
     }
+
+
+#: A hint for functions that can take a path or an IO
+PathOrIO = Union[str, pathlib.Path, TextIO]
diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
@@ -54,6 +54,7 @@
     SUBJECT_SOURCE,
     SUBJECT_SOURCE_ID,
     MetadataType,
+    PathOrIO,
     _get_sssom_schema_object,
     get_default_metadata,
 )
@@ -76,7 +77,7 @@
 # Parsers (from file)
 
 
-def _open_input(input: Union[str, Path, TextIO]) -> io.StringIO:
+def _open_input(input: PathOrIO) -> io.StringIO:
     """Transform a URL, a filepath (from pathlib), or a string (with file contents) to a StringIO object.
 
     :param input: A string representing a URL, a filepath, or file contents,
@@ -284,12 +285,33 @@ def _get_converter_pop_replace_curie_map(sssom_metadata):
 
 
 def parse_sssom_table(
-    file_path: Union[str, Path, TextIO],
+    file_path: PathOrIO,
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
-    **kwargs,
+    *,
+    strict: bool = False,
+    **kwargs: Any,
 ) -> MappingSetDataFrame:
-    """Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
+    """Parse a SSSOM TSV.
+
+    :param file_path:
+        A file path, URL, or I/O object that contains SSSOM encoded in TSV
+    :param prefix_map:
+        A prefix map or :class:`curies.Converter` used to validate prefixes,
+        CURIEs, and IRIs appearing in the SSSOM TSV
+    :param meta:
+        Additional document-level metadata for the SSSOM TSV document that is not
+        contained within the document itself. For example, this may come from a
+        companion SSSOM YAML file.
+    :param strict:
+        If true, will fail parsing for undefined prefixes, CURIEs, or IRIs
+    :param kwargs:
+        Additional keyword arguments (unhandled)
+    :returns:
+        A parsed dataframe wrapper object
+    """
+    if kwargs:
+        logging.warning("unhandled keyword arguments passed: %s", kwargs)
     if isinstance(file_path, Path) or isinstance(file_path, str):
         raise_for_bad_path(file_path)
     stream: io.StringIO = _open_input(file_path)
@@ -301,7 +323,7 @@ def parse_sssom_table(
     is_valid_built_in_prefixes = _check_redefined_builtin_prefixes(sssom_metadata, meta, prefix_map)
     is_valid_metadata = _is_irregular_metadata([sssom_metadata, meta])
 
-    if kwargs.get("strict"):
+    if strict:
         _fail_in_strict_parsing_mode(is_valid_built_in_prefixes, is_valid_metadata)
 
     # The priority order for combining prefix maps are:
@@ -334,8 +356,11 @@ def parse_sssom_table(
     return msdf
 
 
+parse_tsv = parse_sssom_table
+
+
 def parse_sssom_rdf(
-    file_path: str,
+    file_path: Union[str, Path],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
@@ -373,7 +398,10 @@ def parse_sssom_rdf(
 
 
 def parse_sssom_json(
-    file_path: str, prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None, **kwargs
+    file_path: Union[str, Path],
+    prefix_map: ConverterHint = None,
+    meta: Optional[MetadataType] = None,
+    **kwargs,
 ) -> MappingSetDataFrame:
     """Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
     raise_for_bad_path(file_path)
@@ -407,7 +435,7 @@ def parse_sssom_json(
 
 
 def parse_obographs_json(
-    file_path: str,
+    file_path: Union[str, Path],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     mapping_predicates: Optional[List[str]] = None,
@@ -491,7 +519,7 @@ def _get_mapping_dict(
 
 
 def parse_alignment_xml(
-    file_path: str,
+    file_path: Union[str, Path],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     mapping_predicates: Optional[List[str]] = None,
@@ -501,7 +529,7 @@ def parse_alignment_xml(
 
     converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta)
     logging.info("Loading from alignment API")
-    xmldoc = minidom.parse(file_path)
+    xmldoc = minidom.parse(Path(file_path).resolve().as_posix())
     msdf = from_alignment_minidom(
         xmldoc,
         prefix_map=converter,

diff --git a/src/sssom/util.py b/src/sssom/util.py
@@ -399,12 +399,6 @@ class MappingSetDiff:
     """
 
 
-def parse(filename: Union[str, Path]) -> pd.DataFrame:
-    """Parse a TSV to a pandas frame."""
-    logging.info(f"Parsing {filename}")
-    return pd.read_csv(filename, sep="\t", comment="#")
-
-
 def collapse(df: pd.DataFrame) -> pd.DataFrame:
     """Collapse rows with same S/P/O and combines confidence."""
     df2 = df.groupby([SUBJECT_ID, PREDICATE_ID, OBJECT_ID])[CONFIDENCE].apply(max).reset_index()

diff --git a/src/sssom/writers.py b/src/sssom/writers.py
@@ -2,8 +2,9 @@
 
 import json
 import logging as _logging
+from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union
+from typing import Any, Callable, Dict, Generator, List, Optional, TextIO, Tuple, Union
 
 import pandas as pd
 import yaml
@@ -18,7 +19,7 @@
 
 from sssom.validators import check_all_prefixes_in_curie_map
 
-from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX
+from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX, PathOrIO
 from .context import _load_sssom_context
 from .parsers import to_mapping_set_document
 from .util import (
@@ -47,12 +48,21 @@
 MSDFWriter = Callable[[MappingSetDataFrame, TextIO], None]
 
 
+@contextmanager
+def _open_text_writer(xx: PathOrIO) -> Generator[TextIO, None, None]:
+    if isinstance(xx, (str, Path)):
+        with open(xx, "w") as file:
+            yield file
+    else:
+        yield xx
+
+
 def write_table(
     msdf: MappingSetDataFrame,
-    file: TextIO,
+    file: PathOrIO,
     embedded_mode: bool = True,
-    serialisation="tsv",
-    sort=False,
+    serialisation: str = "tsv",
+    sort: bool = False,
 ) -> None:
     """Write a mapping set dataframe to the file as a table."""
     sep = _get_separator(serialisation)
@@ -68,20 +78,31 @@ def write_table(
         lines = [f"# {line}" for line in lines if line != ""]
         s = msdf.df.to_csv(sep=sep, index=False).rstrip("\n")
         lines = lines + [s]
-        for line in lines:
-            print(line, file=file)
+        with _open_text_writer(file) as fh:
+            for line in lines:
+                print(line, file=fh)
     else:
+        if isinstance(file, (str, Path)):
+            yml_filepath = Path(file).with_suffix(".yaml")
+        else:
+            yml_filepath = Path(file.name.replace("tsv", "yaml"))
+
         # Export MSDF as tsv
         msdf.df.to_csv(file, sep=sep, index=False)
-        # Export Metadata as yaml
-        yml_filepath = file.name.replace("tsv", "yaml")
         with open(yml_filepath, "w") as y:
             yaml.safe_dump(meta, y)
 
 
+def write_tsv(
+    msdf: MappingSetDataFrame, path: PathOrIO, embedded_mode: bool = True, sort: bool = False
+) -> None:
+    """Write a mapping set to a TSV file."""
+    write_table(msdf, path, serialisation="tsv", embedded_mode=embedded_mode, sort=sort)
+
+
 def write_rdf(
     msdf: MappingSetDataFrame,
-    file: TextIO,
+    file: PathOrIO,
     serialisation: Optional[str] = None,
 ) -> None:
     """Write a mapping set dataframe to the file as RDF."""
@@ -97,17 +118,21 @@ def write_rdf(
     check_all_prefixes_in_curie_map(msdf)
     graph = to_rdf_graph(msdf=msdf)
     t = graph.serialize(format=serialisation, encoding="utf-8")
-    print(t.decode(), file=file)
+    with _open_text_writer(file) as fh:
+        print(t.decode(), file=fh)
 
 
-def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None:
+def write_json(msdf: MappingSetDataFrame, output: PathOrIO, serialisation="json") -> None:
     """Write a mapping set dataframe to the file as JSON.
 
+    :param msdf: A mapping set dataframe
+    :param output: A path or write-supported file object to write JSON to
     :param serialisation: The JSON format to use. Supported formats are:
-     - fhir_json: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)
+
+     - ``fhir_json``: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)
        https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_fhir_json
-     - json: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
-     - ontoportal_json: Outputs JSON in Ontoportal format (https://ontoportal.org/)
+     - ``json``: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
+     - ``ontoportal_json``: Outputs JSON in Ontoportal format (https://ontoportal.org/)
        https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_ontoportal_json
     """
     func_map: Dict[str, Callable] = {
@@ -121,11 +146,12 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json")
         )
     func: Callable = func_map[serialisation]
     data = func(msdf)
-    json.dump(data, output, indent=2)
+    with _open_text_writer(output) as fh:
+        json.dump(data, fh, indent=2)
 
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
-def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir_json") -> None:
+def write_fhir_json(msdf: MappingSetDataFrame, output: PathOrIO, serialisation="fhir_json") -> None:
     """Write a mapping set dataframe to the file as FHIR ConceptMap JSON."""
     if serialisation != "fhir_json":
         raise ValueError(
@@ -136,7 +162,7 @@ def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fh
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
 def write_ontoportal_json(
-    msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json"
+    msdf: MappingSetDataFrame, output: PathOrIO, serialisation: str = "ontoportal_json"
 ) -> None:
     """Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
     if serialisation != "ontoportal_json":
@@ -148,7 +174,7 @@ def write_ontoportal_json(
 
 def write_owl(
     msdf: MappingSetDataFrame,
-    file: TextIO,
+    file: PathOrIO,
     serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
 ) -> None:
     """Write a mapping set dataframe to the file as OWL."""
@@ -161,7 +187,8 @@ def write_owl(
 
     graph = to_owl_graph(msdf)
     t = graph.serialize(format=serialisation, encoding="utf-8")
-    print(t.decode(), file=file)
+    with _open_text_writer(file) as fh:
+        print(t.decode(), file=fh)
 
 
 # Converters

diff --git a/tests/test_collapse.py b/tests/test_collapse.py
@@ -1,6 +1,10 @@
 """Test various grouping functionalities."""
 
 import unittest
+from pathlib import Path
+from typing import Union
+
+import pandas as pd
 
 from sssom.parsers import parse_sssom_table
 from sssom.util import (
@@ -9,12 +13,16 @@
     dataframe_to_ptable,
     filter_redundant_rows,
     group_mappings,
-    parse,
     reconcile_prefix_and_data,
 )
 from tests.constants import data_dir
 
 
+def parse(filename: Union[str, Path]) -> pd.DataFrame:
+    """Parse a TSV to a pandas frame."""
+    return pd.read_csv(filename, sep="\t", comment="#")
+
+
 class TestCollapse(unittest.TestCase):
     """Test various grouping functionalities."""