From 8ad36058c6e3c082da965eeae3256f0ac1f4ed66 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 10:44:49 +0200
Subject: [PATCH 01/19] Update public API

---
 README.md              | 31 +++++++++++++++++++++++--------
 src/sssom/__init__.py  |  3 ++-
 src/sssom/parsers.py   | 25 ++++++++++++++++++++++---
 src/sssom/util.py      |  6 ------
 tests/test_collapse.py | 10 +++++++++-
 5 files changed, 56 insertions(+), 19 deletions(-)
diff --git a/README.md b/README.md
index 30c22a57..93837377 100644
--- a/README.md
+++ b/README.md
@@ -21,17 +21,32 @@
 
 <img src="https://github.com/tis-lab/closed-illustrations/raw/master/logos/sssom-logos/sssom_logo_black_banner.png" />
 
-SSSOM (Simple Standard for Sharing Ontology Mappings) is a TSV and RDF/OWL standard for ontology mappings
+A Python library and command line interface (CLI) for working with
+[SSSOM (Simple Standard for Sharing Ontology Mappings)](https://github.com/mapping-commons/sssom).
 
-```
-WARNING: 
-    The export formats (json, rdf) of sssom-py are not yet finalised! 
-    Please expect changes in future releases!
-```
+## Getting Started
+
+A SSSOM TSV can be parsed with
+
+```python
+import sssom
 
-See https://github.com/OBOFoundry/SSSOM
+# other SSSOM files can be found on https://mapping-commons.github.io
+url = "https://raw.githubusercontent.com/mapping-commons/mh_mapping_initiative/master/mappings/mp_hp_eye_impc.sssom.tsv"
+
+# TSV can be parsed into a mapping set dataframe object,
+# which includes a pandas DataFrame, a curies.Converter,
+# and metadata
+msdf = sssom.parse_tsv(url)
+
+# SSSOM comes with several "write" functions
+sssom.write_json(msdf, "test.json")
+sssom.write_owl(msdf, "test.owl")
+sssom.write_rdf(msdf, "test.ttl")
+```
 
-This is a python library and command line toolkit for working with SSSOM. It also defines a schema for SSSOM.
+> [!WARNING]  
+> The export formats (json, rdf) of sssom-py are not yet finalised! Expect changes in future releases.
 
 ## Documentation
 
diff --git a/src/sssom/__init__.py b/src/sssom/__init__.py
index 08f4fafc..ec1d032f 100644
--- a/src/sssom/__init__.py
+++ b/src/sssom/__init__.py
@@ -19,8 +19,9 @@
     dataframe_to_ptable,
     filter_redundant_rows,
     group_mappings,
-    parse,
     reconcile_prefix_and_data,
 )
 
 from .constants import generate_mapping_set_id, get_default_metadata  # noqa:401
+from .parsers import parse_tsv  # noqa:401
+from .writers import write_json, write_owl, write_rdf, write_tsv  # noqa:401
diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index b95c6573..ae5c1be3 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -287,9 +287,25 @@ def parse_sssom_table(
     file_path: Union[str, Path, TextIO],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
-    **kwargs,
+    *,
+    strict: bool = False,
 ) -> MappingSetDataFrame:
-    """Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
+    """Parse a SSSOM TSV.
+
+    :param file_path:
+        A file path, URL, or I/O object that contains SSSOM encoded in TSV
+    :param prefix_map:
+        A prefix map or :class:`curies.Converter` used to validate prefixes,
+        CURIEs, and IRIs appearing in the SSSOM TSV
+    :param meta:
+        Additional document-level metadata for the SSSOM TSV document that is not
+        contained within the document itself. For example, this may come from a
+        companion SSSOM YAML file.
+    :param strict:
+        If true, will fail parsing for undefined prefixes, CURIEs, or IRIs
+    :returns:
+        A parsed dataframe wrapper object
+    """
     if isinstance(file_path, Path) or isinstance(file_path, str):
         raise_for_bad_path(file_path)
     stream: io.StringIO = _open_input(file_path)
@@ -301,7 +317,7 @@ def parse_sssom_table(
     is_valid_built_in_prefixes = _check_redefined_builtin_prefixes(sssom_metadata, meta, prefix_map)
     is_valid_metadata = _is_irregular_metadata([sssom_metadata, meta])
 
-    if kwargs.get("strict"):
+    if strict:
         _fail_in_strict_parsing_mode(is_valid_built_in_prefixes, is_valid_metadata)
 
     # The priority order for combining prefix maps are:
@@ -334,6 +350,9 @@ def parse_sssom_table(
     return msdf
 
 
+parse_tsv = parse_sssom_table
+
+
 def parse_sssom_rdf(
     file_path: str,
     prefix_map: ConverterHint = None,
diff --git a/src/sssom/util.py b/src/sssom/util.py
index 699e1ed0..7c5dee58 100644
--- a/src/sssom/util.py
+++ b/src/sssom/util.py
@@ -399,12 +399,6 @@ class MappingSetDiff:
     """
 
 
-def parse(filename: Union[str, Path]) -> pd.DataFrame:
-    """Parse a TSV to a pandas frame."""
-    logging.info(f"Parsing {filename}")
-    return pd.read_csv(filename, sep="\t", comment="#")
-
-
 def collapse(df: pd.DataFrame) -> pd.DataFrame:
     """Collapse rows with same S/P/O and combines confidence."""
     df2 = df.groupby([SUBJECT_ID, PREDICATE_ID, OBJECT_ID])[CONFIDENCE].apply(max).reset_index()
diff --git a/tests/test_collapse.py b/tests/test_collapse.py
index a366270c..9cde66d2 100644
--- a/tests/test_collapse.py
+++ b/tests/test_collapse.py
@@ -1,6 +1,10 @@
 """Test various grouping functionalities."""
 
 import unittest
+from pathlib import Path
+from typing import Union
+
+import pandas as pd
 
 from sssom.parsers import parse_sssom_table
 from sssom.util import (
@@ -9,12 +13,16 @@
     dataframe_to_ptable,
     filter_redundant_rows,
     group_mappings,
-    parse,
     reconcile_prefix_and_data,
 )
 from tests.constants import data_dir
 
 
+def parse(filename: Union[str, Path]) -> pd.DataFrame:
+    """Parse a TSV to a pandas frame."""
+    return pd.read_csv(filename, sep="\t", comment="#")
+
+
 class TestCollapse(unittest.TestCase):
     """Test various grouping functionalities."""
 

From 4cb86c95a7f10d8cf87185c8485cd3dcf8fe097a Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 10:44:55 +0200
Subject: [PATCH 02/19] Update writers.py

---
 src/sssom/writers.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/sssom/writers.py b/src/sssom/writers.py
index 01917a30..4d9be045 100644
--- a/src/sssom/writers.py
+++ b/src/sssom/writers.py
@@ -51,8 +51,8 @@ def write_table(
     msdf: MappingSetDataFrame,
     file: TextIO,
     embedded_mode: bool = True,
-    serialisation="tsv",
-    sort=False,
+    serialisation: str = "tsv",
+    sort: bool = False,
 ) -> None:
     """Write a mapping set dataframe to the file as a table."""
     sep = _get_separator(serialisation)
@@ -79,6 +79,11 @@ def write_table(
             yaml.safe_dump(meta, y)
 
 
+def write_tsv(msdf: MappingSetDataFrame, path: str | Path | TextIO, sort: bool = False) -> None:
+    """Write a mapping set to a TSV file."""
+    raise NotImplementedError
+
+
 def write_rdf(
     msdf: MappingSetDataFrame,
     file: TextIO,

From 47061a7da20150d9dd43942f36ddd1b71770a8e9 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 10:57:32 +0200
Subject: [PATCH 03/19] Enable passing paths to writers

---
 src/sssom/writers.py  | 61 +++++++++++++++++++++++++++++++------------
 tests/test_writers.py | 23 ++++++----------
 2 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/src/sssom/writers.py b/src/sssom/writers.py
index 4d9be045..3e8714e4 100644
--- a/src/sssom/writers.py
+++ b/src/sssom/writers.py
@@ -49,7 +49,7 @@
 
 def write_table(
     msdf: MappingSetDataFrame,
-    file: TextIO,
+    file: str | Path | TextIO,
     embedded_mode: bool = True,
     serialisation: str = "tsv",
     sort: bool = False,
@@ -68,13 +68,21 @@ def write_table(
         lines = [f"# {line}" for line in lines if line != ""]
         s = msdf.df.to_csv(sep=sep, index=False).rstrip("\n")
         lines = lines + [s]
-        for line in lines:
-            print(line, file=file)
+        if isinstance(file, str | Path):
+            with open(file, "w") as fh:
+                for line in lines:
+                    print(line, file=fh)
+        else:
+            for line in lines:
+                print(line, file=file)
     else:
+        if isinstance(file, str | Path):
+            yml_filepath = Path(file).with_suffix(".yaml")
+        else:
+            yml_filepath = file.name.replace("tsv", "yaml")
+
         # Export MSDF as tsv
         msdf.df.to_csv(file, sep=sep, index=False)
-        # Export Metadata as yaml
-        yml_filepath = file.name.replace("tsv", "yaml")
         with open(yml_filepath, "w") as y:
             yaml.safe_dump(meta, y)
 
@@ -86,7 +94,7 @@ def write_tsv(msdf: MappingSetDataFrame, path: str | Path | TextIO, sort: bool =
 
 def write_rdf(
     msdf: MappingSetDataFrame,
-    file: TextIO,
+    file: str | Path | TextIO,
     serialisation: Optional[str] = None,
 ) -> None:
     """Write a mapping set dataframe to the file as RDF."""
@@ -102,17 +110,26 @@ def write_rdf(
     check_all_prefixes_in_curie_map(msdf)
     graph = to_rdf_graph(msdf=msdf)
     t = graph.serialize(format=serialisation, encoding="utf-8")
-    print(t.decode(), file=file)
+    if isinstance(file, str | Path):
+        with open(file, "w") as fh:
+            print(t.decode(), file=fh)
+    else:
+        print(t.decode(), file=file)
 
 
-def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json") -> None:
+def write_json(
+    msdf: MappingSetDataFrame, output: str | Path | TextIO, serialisation="json"
+) -> None:
     """Write a mapping set dataframe to the file as JSON.
 
+    :param msdf: A mapping set dataframe
+    :param output: A path or write-supported file object to write JSON to
     :param serialisation: The JSON format to use. Supported formats are:
-     - fhir_json: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)
+
+     - ``fhir_json``: Outputs JSON in FHIR ConceptMap format (https://fhir-ru.github.io/conceptmap.html)
        https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_fhir_json
-     - json: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
-     - ontoportal_json: Outputs JSON in Ontoportal format (https://ontoportal.org/)
+     - ``json``: Outputs to SSSOM JSON https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_json
+     - ``ontoportal_json``: Outputs JSON in Ontoportal format (https://ontoportal.org/)
        https://mapping-commons.github.io/sssom-py/sssom.html#sssom.writers.to_ontoportal_json
     """
     func_map: Dict[str, Callable] = {
@@ -126,11 +143,18 @@ def write_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="json")
         )
     func: Callable = func_map[serialisation]
     data = func(msdf)
-    json.dump(data, output, indent=2)
+
+    if isinstance(output, str | Path):
+        with open(output, "w") as file:
+            json.dump(data, file, indent=2)
+    else:
+        json.dump(data, output, indent=2)
 
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
-def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fhir_json") -> None:
+def write_fhir_json(
+    msdf: MappingSetDataFrame, output: str | Path | TextIO, serialisation="fhir_json"
+) -> None:
     """Write a mapping set dataframe to the file as FHIR ConceptMap JSON."""
     if serialisation != "fhir_json":
         raise ValueError(
@@ -141,7 +165,7 @@ def write_fhir_json(msdf: MappingSetDataFrame, output: TextIO, serialisation="fh
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
 def write_ontoportal_json(
-    msdf: MappingSetDataFrame, output: TextIO, serialisation: str = "ontoportal_json"
+    msdf: MappingSetDataFrame, output: str | Path | TextIO, serialisation: str = "ontoportal_json"
 ) -> None:
     """Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
     if serialisation != "ontoportal_json":
@@ -153,7 +177,7 @@ def write_ontoportal_json(
 
 def write_owl(
     msdf: MappingSetDataFrame,
-    file: TextIO,
+    file: str | Path | TextIO,
     serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
 ) -> None:
     """Write a mapping set dataframe to the file as OWL."""
@@ -166,7 +190,12 @@ def write_owl(
 
     graph = to_owl_graph(msdf)
     t = graph.serialize(format=serialisation, encoding="utf-8")
-    print(t.decode(), file=file)
+
+    if isinstance(file, str | Path):
+        with open(file) as fh:
+            print(t.decode(), file=fh)
+    else:
+        print(t.decode(), file=file)
 
 
 # Converters
diff --git a/tests/test_writers.py b/tests/test_writers.py
index ad27e565..22fab7f5 100644
--- a/tests/test_writers.py
+++ b/tests/test_writers.py
@@ -43,20 +43,18 @@ def setUp(self) -> None:
     def test_write_sssom_dataframe(self):
         """Test writing as a dataframe."""
         tmp_path = os.path.join(test_out_dir, "test_write_sssom_dataframe.tsv")
-        with open(tmp_path, "w") as tmp_file:
-            write_table(self.msdf, tmp_file)
+        write_table(self.msdf, tmp_path)
         msdf = parse_sssom_table(tmp_path)
         self.assertEqual(
             len(msdf.df),
             self.mapping_count,
-            f"{tmp_file} has the wrong number of mappings.",
+            f"{tmp_path} has the wrong number of mappings.",
         )
 
     def test_write_sssom_rdf(self):
         """Test writing as RDF."""
         path_1 = os.path.join(test_out_dir, "test_write_sssom_rdf.rdf")
-        with open(path_1, "w") as file:
-            write_rdf(self.msdf, file)
+        write_rdf(self.msdf, path_1)
         msdf = parse_sssom_rdf(path_1, self.msdf.prefix_map)
         self.assertEqual(
             len(msdf.df),
@@ -66,14 +64,12 @@ def test_write_sssom_rdf(self):
 
         # TODO this test doesn't make sense
         path_2 = os.path.join(test_out_dir, "test_write_sssom_rdf.rdf.tsv")
-        with open(path_2, "w") as file:
-            write_table(self.msdf, file)
+        write_table(self.msdf, path_2)
 
     def test_write_sssom_json(self):
         """Test writing as JSON."""
         path = os.path.join(test_out_dir, "test_write_sssom_json.json")
-        with open(path, "w") as file:
-            write_json(self.msdf, file)
+        write_json(self.msdf, path)
         msdf = parse_sssom_json(path)
         self.assertEqual(
             len(msdf.df),
@@ -136,8 +132,7 @@ def test_write_sssom_fhir(self):
         mapping_set_id: str = metadata["mapping_set_id"]
 
         # Write
-        with open(path, "w") as file:
-            write_json(self.msdf, file, "fhir_json")
+        write_json(self.msdf, path, "fhir_json")
         # Read
         # todo: after implementing reader/importer, change this to `msdf = parse_sssom_fhir_json()`
         with open(path, "r") as file:
@@ -174,14 +169,12 @@ def test_write_sssom_fhir(self):
     def test_write_sssom_owl(self):
         """Test writing as OWL."""
         tmp_file = os.path.join(test_out_dir, "test_write_sssom_owl.owl")
-        with open(tmp_file, "w") as file:
-            write_owl(self.msdf, file)
+        write_owl(self.msdf, tmp_file)
 
     def test_write_sssom_ontoportal_json(self):
         """Test writing as ontoportal JSON."""
         path = os.path.join(test_out_dir, "test_write_sssom_ontoportal_json.json")
-        with open(path, "w") as file:
-            write_json(self.msdf, file, "ontoportal_json")
+        write_json(self.msdf, path, "ontoportal_json")
 
         with open(path, "r") as file:
             d = json.load(file)

From 272f2f113d5863c6bad01f8b05f99c287398743c Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 11:04:35 +0200
Subject: [PATCH 04/19] Refactor

---
 src/sssom/writers.py  | 45 +++++++++++++++++++------------------------
 tests/test_writers.py |  2 +-
 2 files changed, 21 insertions(+), 26 deletions(-)

diff --git a/src/sssom/writers.py b/src/sssom/writers.py
index 3e8714e4..64742fd7 100644
--- a/src/sssom/writers.py
+++ b/src/sssom/writers.py
@@ -2,8 +2,9 @@
 
 import json
 import logging as _logging
+from contextlib import contextmanager
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, TextIO, Tuple, Union
+from typing import Any, Callable, Dict, Generator, List, Optional, TextIO, Tuple, Union
 
 import pandas as pd
 import yaml
@@ -47,6 +48,15 @@
 MSDFWriter = Callable[[MappingSetDataFrame, TextIO], None]
 
 
+@contextmanager
+def _open_text_writer(xx: str | Path | TextIO) -> Generator[TextIO, None, None]:
+    if isinstance(xx, str | Path):
+        with open(xx, "w") as file:
+            yield file
+    else:
+        yield xx
+
+
 def write_table(
     msdf: MappingSetDataFrame,
     file: str | Path | TextIO,
@@ -68,18 +78,14 @@ def write_table(
         lines = [f"# {line}" for line in lines if line != ""]
         s = msdf.df.to_csv(sep=sep, index=False).rstrip("\n")
         lines = lines + [s]
-        if isinstance(file, str | Path):
-            with open(file, "w") as fh:
-                for line in lines:
-                    print(line, file=fh)
-        else:
+        with _open_text_writer(file) as fh:
             for line in lines:
-                print(line, file=file)
+                print(line, file=fh)
     else:
         if isinstance(file, str | Path):
             yml_filepath = Path(file).with_suffix(".yaml")
         else:
-            yml_filepath = file.name.replace("tsv", "yaml")
+            yml_filepath = Path(file.name.replace("tsv", "yaml"))
 
         # Export MSDF as tsv
         msdf.df.to_csv(file, sep=sep, index=False)
@@ -110,11 +116,8 @@ def write_rdf(
     check_all_prefixes_in_curie_map(msdf)
     graph = to_rdf_graph(msdf=msdf)
     t = graph.serialize(format=serialisation, encoding="utf-8")
-    if isinstance(file, str | Path):
-        with open(file, "w") as fh:
-            print(t.decode(), file=fh)
-    else:
-        print(t.decode(), file=file)
+    with _open_text_writer(file) as fh:
+        print(t.decode(), file=fh)
 
 
 def write_json(
@@ -143,12 +146,8 @@ def write_json(
         )
     func: Callable = func_map[serialisation]
     data = func(msdf)
-
-    if isinstance(output, str | Path):
-        with open(output, "w") as file:
-            json.dump(data, file, indent=2)
-    else:
-        json.dump(data, output, indent=2)
+    with _open_text_writer(output) as fh:
+        json.dump(data, fh, indent=2)
 
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
@@ -190,12 +189,8 @@ def write_owl(
 
     graph = to_owl_graph(msdf)
     t = graph.serialize(format=serialisation, encoding="utf-8")
-
-    if isinstance(file, str | Path):
-        with open(file) as fh:
-            print(t.decode(), file=fh)
-    else:
-        print(t.decode(), file=file)
+    with _open_text_writer(file) as fh:
+        print(t.decode(), file=fh)
 
 
 # Converters
diff --git a/tests/test_writers.py b/tests/test_writers.py
index 22fab7f5..8a6022e0 100644
--- a/tests/test_writers.py
+++ b/tests/test_writers.py
@@ -123,7 +123,7 @@ def test_update_sssom_context_with_prefixmap(self):
         self.assertNotIn("snomed", context["@context"])
         self.assertIn("mapping_set_id", context["@context"])
 
-    def test_write_sssom_fhir(self):
+    def test_write_sssom_fhir(self) -> None:
         """Test writing as FHIR ConceptMap JSON."""
         # Vars
         path = os.path.join(test_out_dir, "test_write_sssom_fhir.json")

From f7b8380b265d093356b88efb853b01f3bff81d19 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 11:11:15 +0200
Subject: [PATCH 05/19] Update typing

---
 src/sssom/constants.py |  6 +++++-
 src/sssom/parsers.py   | 18 +++++++++++-------
 src/sssom/writers.py   | 26 ++++++++++++--------------
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/src/sssom/constants.py b/src/sssom/constants.py
index b2800ac3..d407f0ef 100644
--- a/src/sssom/constants.py
+++ b/src/sssom/constants.py
@@ -4,7 +4,7 @@
 import uuid
 from enum import Enum
 from functools import cached_property, lru_cache
-from typing import Any, Dict, List, Literal, Set
+from typing import Any, Dict, List, Literal, Set, TextIO, Union
 
 import importlib_resources
 import yaml
@@ -316,3 +316,7 @@ def get_default_metadata() -> MetadataType:
         "mapping_set_id": generate_mapping_set_id(),
         "license": DEFAULT_LICENSE,
     }
+
+
+#: A hint for functions that can take a path or an IO
+PathOrIO = Union[str, pathlib.Path, TextIO]
diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index ae5c1be3..6855a12d 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -54,6 +54,7 @@
     SUBJECT_SOURCE,
     SUBJECT_SOURCE_ID,
     MetadataType,
+    PathOrIO,
     _get_sssom_schema_object,
     get_default_metadata,
 )
@@ -76,7 +77,7 @@
 # Parsers (from file)
 
 
-def _open_input(input: Union[str, Path, TextIO]) -> io.StringIO:
+def _open_input(input: PathOrIO) -> io.StringIO:
     """Transform a URL, a filepath (from pathlib), or a string (with file contents) to a StringIO object.
 
     :param input: A string representing a URL, a filepath, or file contents,
@@ -284,7 +285,7 @@ def _get_converter_pop_replace_curie_map(sssom_metadata):
 
 
 def parse_sssom_table(
-    file_path: Union[str, Path, TextIO],
+    file_path: PathOrIO,
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     *,
@@ -354,7 +355,7 @@ def parse_sssom_table(
 
 
 def parse_sssom_rdf(
-    file_path: str,
+    file_path: Union[str, Path],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
@@ -392,7 +393,10 @@ def parse_sssom_rdf(
 
 
 def parse_sssom_json(
-    file_path: str, prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None, **kwargs
+    file_path: Union[str, Path],
+    prefix_map: ConverterHint = None,
+    meta: Optional[MetadataType] = None,
+    **kwargs,
 ) -> MappingSetDataFrame:
     """Parse a TSV to a :class:`MappingSetDocument` to a :class:`MappingSetDataFrame`."""
     raise_for_bad_path(file_path)
@@ -426,7 +430,7 @@ def parse_sssom_json(
 
 
 def parse_obographs_json(
-    file_path: str,
+    file_path: Union[str, Path],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     mapping_predicates: Optional[List[str]] = None,
@@ -510,7 +514,7 @@ def _get_mapping_dict(
 
 
 def parse_alignment_xml(
-    file_path: str,
+    file_path: Union[str, Path],
     prefix_map: ConverterHint = None,
     meta: Optional[MetadataType] = None,
     mapping_predicates: Optional[List[str]] = None,
@@ -520,7 +524,7 @@ def parse_alignment_xml(
 
     converter, meta = _get_prefix_map_and_metadata(prefix_map=prefix_map, meta=meta)
     logging.info("Loading from alignment API")
-    xmldoc = minidom.parse(file_path)
+    xmldoc = minidom.parse(Path(file_path).resolve().as_posix())
     msdf = from_alignment_minidom(
         xmldoc,
         prefix_map=converter,
diff --git a/src/sssom/writers.py b/src/sssom/writers.py
index 64742fd7..2417dacb 100644
--- a/src/sssom/writers.py
+++ b/src/sssom/writers.py
@@ -19,7 +19,7 @@
 
 from sssom.validators import check_all_prefixes_in_curie_map
 
-from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX
+from .constants import CURIE_MAP, SCHEMA_YAML, SSSOM_URI_PREFIX, PathOrIO
 from .context import _load_sssom_context
 from .parsers import to_mapping_set_document
 from .util import (
@@ -49,7 +49,7 @@
 
 
 @contextmanager
-def _open_text_writer(xx: str | Path | TextIO) -> Generator[TextIO, None, None]:
+def _open_text_writer(xx: PathOrIO) -> Generator[TextIO, None, None]:
     if isinstance(xx, str | Path):
         with open(xx, "w") as file:
             yield file
@@ -59,7 +59,7 @@ def _open_text_writer(xx: str | Path | TextIO) -> Generator[TextIO, None, None]:
 
 def write_table(
     msdf: MappingSetDataFrame,
-    file: str | Path | TextIO,
+    file: PathOrIO,
     embedded_mode: bool = True,
     serialisation: str = "tsv",
     sort: bool = False,
@@ -93,14 +93,16 @@ def write_table(
             yaml.safe_dump(meta, y)
 
 
-def write_tsv(msdf: MappingSetDataFrame, path: str | Path | TextIO, sort: bool = False) -> None:
+def write_tsv(
+    msdf: MappingSetDataFrame, path: PathOrIO, embedded_mode: bool = True, sort: bool = False
+) -> None:
     """Write a mapping set to a TSV file."""
-    raise NotImplementedError
+    write_table(msdf, path, serialisation="tsv", embedded_mode=embedded_mode, sort=sort)
 
 
 def write_rdf(
     msdf: MappingSetDataFrame,
-    file: str | Path | TextIO,
+    file: PathOrIO,
     serialisation: Optional[str] = None,
 ) -> None:
     """Write a mapping set dataframe to the file as RDF."""
@@ -120,9 +122,7 @@ def write_rdf(
         print(t.decode(), file=fh)
 
 
-def write_json(
-    msdf: MappingSetDataFrame, output: str | Path | TextIO, serialisation="json"
-) -> None:
+def write_json(msdf: MappingSetDataFrame, output: PathOrIO, serialisation="json") -> None:
     """Write a mapping set dataframe to the file as JSON.
 
     :param msdf: A mapping set dataframe
@@ -151,9 +151,7 @@ def write_json(
 
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
-def write_fhir_json(
-    msdf: MappingSetDataFrame, output: str | Path | TextIO, serialisation="fhir_json"
-) -> None:
+def write_fhir_json(msdf: MappingSetDataFrame, output: PathOrIO, serialisation="fhir_json") -> None:
     """Write a mapping set dataframe to the file as FHIR ConceptMap JSON."""
     if serialisation != "fhir_json":
         raise ValueError(
@@ -164,7 +162,7 @@ def write_fhir_json(
 
 @deprecated(deprecated_in="0.4.7", details="Use write_json() instead")
 def write_ontoportal_json(
-    msdf: MappingSetDataFrame, output: str | Path | TextIO, serialisation: str = "ontoportal_json"
+    msdf: MappingSetDataFrame, output: PathOrIO, serialisation: str = "ontoportal_json"
 ) -> None:
     """Write a mapping set dataframe to the file as the ontoportal mapping JSON model."""
     if serialisation != "ontoportal_json":
@@ -176,7 +174,7 @@ def write_ontoportal_json(
 
 def write_owl(
     msdf: MappingSetDataFrame,
-    file: str | Path | TextIO,
+    file: PathOrIO,
     serialisation=SSSOM_DEFAULT_RDF_SERIALISATION,
 ) -> None:
     """Write a mapping set dataframe to the file as OWL."""

From 5f81d40c6fb8b307b73abbfac6b4ad307af3f482 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 11:13:29 +0200
Subject: [PATCH 06/19] Update writers.py

---
 src/sssom/writers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sssom/writers.py b/src/sssom/writers.py
index 2417dacb..f1a453ad 100644
--- a/src/sssom/writers.py
+++ b/src/sssom/writers.py
@@ -50,7 +50,7 @@
 
 @contextmanager
 def _open_text_writer(xx: PathOrIO) -> Generator[TextIO, None, None]:
-    if isinstance(xx, str | Path):
+    if isinstance(xx, (str, Path)):
         with open(xx, "w") as file:
             yield file
     else:
@@ -82,7 +82,7 @@ def write_table(
             for line in lines:
                 print(line, file=fh)
     else:
-        if isinstance(file, str | Path):
+        if isinstance(file, (str, Path)):
             yml_filepath = Path(file).with_suffix(".yaml")
         else:
             yml_filepath = Path(file.name.replace("tsv", "yaml"))

From 21266ff66f540e79ebcceefe54d3b795172a8248 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 11:17:41 +0200
Subject: [PATCH 07/19] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 93837377..859c67f5 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,7 @@ url = "https://raw.githubusercontent.com/mapping-commons/mh_mapping_initiative/m
 msdf = sssom.parse_tsv(url)
 
 # SSSOM comes with several "write" functions
+sssom.write_tsv(msdf, "test.tsv")
 sssom.write_json(msdf, "test.json")
 sssom.write_owl(msdf, "test.owl")
 sssom.write_rdf(msdf, "test.ttl")

From 72f6ab6bb0aa579d07f5689a146db0e4edfa16b0 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Fri, 23 May 2025 11:19:51 +0200
Subject: [PATCH 08/19] Update parsers.py

---
 src/sssom/parsers.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index 6855a12d..daf7be9f 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -290,6 +290,7 @@ def parse_sssom_table(
     meta: Optional[MetadataType] = None,
     *,
     strict: bool = False,
+    **kwargs: Any,
 ) -> MappingSetDataFrame:
     """Parse a SSSOM TSV.
 
@@ -304,9 +305,13 @@ def parse_sssom_table(
         companion SSSOM YAML file.
     :param strict:
         If true, will fail parsing for undefined prefixes, CURIEs, or IRIs
+    :param kwargs:
+        Additional keyword arguments (unhandled)
     :returns:
         A parsed dataframe wrapper object
     """
+    if kwargs:
+        logging.warning("unhandled keyword arguments passed: %s", kwargs)
     if isinstance(file_path, Path) or isinstance(file_path, str):
         raise_for_bad_path(file_path)
     stream: io.StringIO = _open_input(file_path)

From 5d936d7ba01970e6b1cdb9fc081ce738752017bf Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:08:22 +0200
Subject: [PATCH 09/19] Add explicit CSV and TSV parsers

---
 src/sssom/__init__.py |  2 +-
 src/sssom/parsers.py  | 59 +++++++++++++++++++++++++------------------
 src/sssom/util.py     | 33 +++++++++++-------------
 src/sssom/writers.py  |  2 +-
 tests/test_utils.py   | 24 ++++++++++++++++++
 5 files changed, 75 insertions(+), 45 deletions(-)

diff --git a/src/sssom/__init__.py b/src/sssom/__init__.py
index ec1d032f..0fdbe39b 100644
--- a/src/sssom/__init__.py
+++ b/src/sssom/__init__.py
@@ -23,5 +23,5 @@
 )
 
 from .constants import generate_mapping_set_id, get_default_metadata  # noqa:401
-from .parsers import parse_tsv  # noqa:401
+from .parsers import parse_csv, parse_sssom_table, parse_tsv  # noqa:401
 from .writers import write_json, write_owl, write_rdf, write_tsv  # noqa:401
diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index daf7be9f..bf67439d 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -144,7 +144,7 @@ def _separate_metadata_and_table_from_stream(s: io.StringIO):
     return table_component, metadata_component
 
 
-def _read_pandas_and_metadata(input: io.StringIO, sep: str = None):
+def _read_pandas_and_metadata(input: io.StringIO, sep: str | None = None):
     """Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
 
     :param input: The file to read. If no separator is given, this file should be named.
@@ -155,7 +155,6 @@ def _read_pandas_and_metadata(input: io.StringIO, sep: str = None):
 
     try:
         df = pd.read_csv(table_stream, sep=sep, dtype=str, engine="python")
-        df.fillna("", inplace=True)
     except EmptyDataError as e:
         logging.warning(f"Seems like the dataframe is empty: {e}")
         df = pd.DataFrame(
@@ -167,28 +166,24 @@ def _read_pandas_and_metadata(input: io.StringIO, sep: str = None):
                 MAPPING_JUSTIFICATION,
             ]
         )
+    else:
+        df.fillna("", inplace=True)
 
-    if isinstance(df, pd.DataFrame):
-        sssom_metadata = _read_metadata_from_table(metadata_stream)
-        return df, sssom_metadata
-
-    return None, None
+    sssom_metadata = _read_metadata_from_table(metadata_stream)
+    return df, sssom_metadata
 
 
-def _get_seperator_symbol_from_file_path(file):
-    r"""
-    Take as an input a filepath and return the seperate symbol used, for example, by pandas.
+def _infer_separator(file: PathOrIO) -> str | None:
+    r"""Infer the CSV separator from a file path or IO object.
 
     :param file: the file path
-    :return: the seperator symbols as a string, e.g. '\t'
+    :return: the separator symbols as a string, e.g. '\t'
     """
-    if isinstance(file, Path) or isinstance(file, str):
-        extension = get_file_extension(file)
-        if extension == "tsv":
-            return "\t"
-        elif extension == "csv":
-            return ","
-        logging.warning(f"Could not guess file extension for {file}")
+    extension = get_file_extension(file)
+    if extension == "tsv":
+        return "\t"
+    elif extension == "csv":
+        return ","
     return None
 
 
@@ -290,9 +285,10 @@ def parse_sssom_table(
     meta: Optional[MetadataType] = None,
     *,
     strict: bool = False,
+    sep: str | None = None,
     **kwargs: Any,
 ) -> MappingSetDataFrame:
-    """Parse a SSSOM TSV.
+    """Parse a SSSOM CSV or TSV file.
 
     :param file_path:
         A file path, URL, or I/O object that contains SSSOM encoded in TSV
@@ -305,6 +301,8 @@ def parse_sssom_table(
         companion SSSOM YAML file.
     :param strict:
         If true, will fail parsing for undefined prefixes, CURIEs, or IRIs
+    :param sep:
+        The seperator. If not given, inferred from file name
     :param kwargs:
         Additional keyword arguments (unhandled)
     :returns:
@@ -315,8 +313,9 @@ def parse_sssom_table(
     if isinstance(file_path, Path) or isinstance(file_path, str):
         raise_for_bad_path(file_path)
     stream: io.StringIO = _open_input(file_path)
-    sep_new = _get_seperator_symbol_from_file_path(file_path)
-    df, sssom_metadata = _read_pandas_and_metadata(stream, sep_new)
+    if sep is None:
+        sep = _infer_separator(file_path)
+    df, sssom_metadata = _read_pandas_and_metadata(stream, sep)
     if meta is None:
         meta = {}
 
@@ -356,7 +355,16 @@ def parse_sssom_table(
     return msdf
 
 
-parse_tsv = parse_sssom_table
+def parse_csv(*args, **kwargs) -> MappingSetDataFrame:
+    """Parse a SSSOM CSV file, forwarding arguments to :func:`parse_sssom_table`."""
+    kwargs["sep"] = ","
+    return parse_sssom_table(*args, **kwargs)
+
+
+def parse_tsv(*args, **kwargs) -> MappingSetDataFrame:
+    """Parse a SSSOM TSV file, forwarding arguments to :func:`parse_sssom_table`."""
+    kwargs["sep"] = "\t"
+    return parse_sssom_table(*args, **kwargs)
 
 
 def parse_sssom_rdf(
@@ -828,6 +836,7 @@ def _make_mdict(
 
 
 PARSING_FUNCTIONS: typing.Mapping[str, Callable] = {
+    "csv": parse_sssom_table,
     "tsv": parse_sssom_table,
     "obographs-json": parse_obographs_json,
     "alignment-api-xml": parse_alignment_xml,
@@ -841,14 +850,14 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
 
     :param input_format: File format
     :param filename: Filename
-    :raises Exception: Unknown file format
+    :raises ValueError: Unknown file format
     :return: Appropriate 'read' function
     """
     if input_format is None:
-        input_format = get_file_extension(filename)
+        input_format = get_file_extension(filename) or "tsv"
     func = PARSING_FUNCTIONS.get(input_format)
     if func is None:
-        raise Exception(f"Unknown input format: {input_format}")
+        raise ValueError(f"Unknown input format: {input_format}")
     return func
 
 
diff --git a/src/sssom/util.py b/src/sssom/util.py
index 7c5dee58..f502c8df 100644
--- a/src/sssom/util.py
+++ b/src/sssom/util.py
@@ -58,6 +58,7 @@
     SUBJECT_SOURCE,
     UNKNOWN_IRI,
     MetadataType,
+    PathOrIO,
     _get_sssom_schema_object,
     get_default_metadata,
 )
@@ -993,29 +994,25 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
     return msdf
 
 
-def get_file_extension(file: Union[str, Path, TextIO]) -> str:
+def get_file_extension(file: PathOrIO) -> str | None:
     """Get file extension.
 
     :param file: File path
     :return: format of the file passed, default tsv
     """
-    if isinstance(file, Path):
-        if file.suffix:
-            return file.suffix.strip(punctuation)
-        else:
-            logging.warning(
-                f"Cannot guess format from {file}, despite appearing to be a Path-like object."
-            )
-    elif isinstance(file, str):
-        filename = file
-        parts = filename.split(".")
-        if len(parts) > 0:
-            f_format = parts[-1]
-            return f_format.strip(punctuation)
-        else:
-            logging.warning(f"Cannot guess format from {filename}")
-    logging.info("Cannot guess format extension for this file, assuming TSV.")
-    return "tsv"
+    if isinstance(file, str):
+        file = Path(file)
+    elif isinstance(file, TextIO):
+        file = Path(file.name)
+
+    filename = file.name.removesuffix(".gz")
+    if filename.endswith(".tsv"):
+        return "tsv"
+    elif filename.endswith(".csv"):
+        return "csv"
+    else:
+        logging.debug("cannot guess format for %s", filename)
+        return None
 
 
 def _extract_global_metadata(msdoc: MappingSetDocument) -> MetadataType:
diff --git a/src/sssom/writers.py b/src/sssom/writers.py
index f1a453ad..a6a10b9a 100644
--- a/src/sssom/writers.py
+++ b/src/sssom/writers.py
@@ -572,7 +572,7 @@ def get_writer_function(
     :return: Type of writer function
     """
     if output_format is None:
-        output_format = get_file_extension(output)
+        output_format = get_file_extension(output) or "tsv"
     if output_format not in WRITER_FUNCTIONS:
         raise ValueError(f"Unknown output format: {output_format}")
     func, tag = WRITER_FUNCTIONS[output_format]
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 42499e69..fac96b63 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,6 +1,8 @@
 """Test for merging MappingSetDataFrames."""
 
+import tempfile
 import unittest
+from pathlib import Path
 
 import numpy as np
 import pandas as pd
@@ -26,6 +28,7 @@
     filter_out_prefixes,
     filter_prefixes,
     get_dict_from_mapping,
+    get_file_extension,
     get_prefixes_used_in_table,
     inject_metadata_into_df,
     invert_mappings,
@@ -33,6 +36,8 @@
 )
 from tests.constants import data_dir
 
+HERE = Path(__file__).parent.resolve()
+
 
 class TestIO(unittest.TestCase):
     """A test case for merging msdfs."""
@@ -498,3 +503,22 @@ def test_curiechain_with_conflicts(self):
             )
 
         # self.assertIn("SCTID", converter.prefix_map)
+
+    def test_get_file_extension(self) -> None:
+        """Test getting a file extension."""
+        for value, part in [
+            ("tsv", "test.tsv"),
+            ("tsv", "test.tsv.gz"),
+            ("csv", "test.csv"),
+            ("csv", "test.csv.gz"),
+            # Don't infer an extension for something else
+            (None, "test.xxx"),
+        ]:
+            path = HERE.joinpath(part)
+            with self.subTest(path=path, mode="path"):
+                self.assertEqual(value, get_file_extension(path))
+            with self.subTest(path=path, mode="str"):
+                self.assertEqual(value, get_file_extension(path.as_posix()))
+            with self.subTest(path=path, mode="file"), tempfile.TemporaryDirectory() as d:
+                with Path(d).joinpath(part).open("w") as file:
+                    self.assertEqual(value, get_file_extension(file))

From 065ba1fa1104acdb99c45b32676ee9697ba71f46 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:13:43 +0200
Subject: [PATCH 10/19] Get safer with more typing

---
 src/sssom/parsers.py | 12 +++++++-----
 src/sssom/util.py    |  7 +++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index bf67439d..e87ae527 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -63,6 +63,7 @@
 from .sssom_document import MappingSetDocument
 from .util import (
     SSSOM_DEFAULT_RDF_SERIALISATION,
+    ExtensionLiteral,
     MappingSetDataFrame,
     get_file_extension,
     is_multivalued_slot,
@@ -173,6 +174,9 @@ def _read_pandas_and_metadata(input: io.StringIO, sep: str | None = None):
     return df, sssom_metadata
 
 
+EXTENSION_TO_SEP: dict[ExtensionLiteral, str] = {"tsv": "\t", "csv": ","}
+
+
 def _infer_separator(file: PathOrIO) -> str | None:
     r"""Infer the CSV separator from a file path or IO object.
 
@@ -180,11 +184,9 @@ def _infer_separator(file: PathOrIO) -> str | None:
     :return: the separator symbols as a string, e.g. '\t'
     """
     extension = get_file_extension(file)
-    if extension == "tsv":
-        return "\t"
-    elif extension == "csv":
-        return ","
-    return None
+    if extension is None:
+        return None
+    return EXTENSION_TO_SEP[extension]
 
 
 def _is_check_valid_extension_slot(slot_name, meta):
diff --git a/src/sssom/util.py b/src/sssom/util.py
index f502c8df..8458be11 100644
--- a/src/sssom/util.py
+++ b/src/sssom/util.py
@@ -10,7 +10,7 @@
 from functools import partial, reduce
 from pathlib import Path
 from string import punctuation
-from typing import Any, DefaultDict, Dict, List, Optional, Set, TextIO, Tuple, Union
+from typing import Any, DefaultDict, Dict, List, Literal, Optional, Set, TextIO, Tuple, Union
 
 import curies
 import numpy as np
@@ -994,7 +994,10 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
     return msdf
 
 
-def get_file_extension(file: PathOrIO) -> str | None:
+ExtensionLiteral = Literal["tsv", "csv"]
+
+
+def get_file_extension(file: PathOrIO) -> ExtensionLiteral | None:
     """Get file extension.
 
     :param file: File path

From faf61db6ac6177ca025e8d46791c64696d7624be Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:15:52 +0200
Subject: [PATCH 11/19] Update test_collapse.py

---
 tests/test_collapse.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/tests/test_collapse.py b/tests/test_collapse.py
index 9cde66d2..61e31b03 100644
--- a/tests/test_collapse.py
+++ b/tests/test_collapse.py
@@ -1,8 +1,6 @@
 """Test various grouping functionalities."""
 
 import unittest
-from pathlib import Path
-from typing import Union
 
 import pandas as pd
 
@@ -18,17 +16,12 @@
 from tests.constants import data_dir
 
 
-def parse(filename: Union[str, Path]) -> pd.DataFrame:
-    """Parse a TSV to a pandas frame."""
-    return pd.read_csv(filename, sep="\t", comment="#")
-
-
 class TestCollapse(unittest.TestCase):
     """Test various grouping functionalities."""
 
     def setUp(self) -> None:
         """Set up the test case."""
-        self.df = parse(data_dir / "basic.tsv")
+        self.df = pd.read_csv(data_dir / "basic.tsv", sep="\t", comment="#")
 
     def test_row_count(self):
         """Test the dataframe has the correct number of rows."""
@@ -71,7 +64,7 @@ def test_diff(self):
             self.assertTrue(c.startswith("COMMON_TO_BOTH"))
         # output = sqldf("select * from diff_df where comment != ''")
 
-        df2 = parse(data_dir / "basic2.tsv")
+        df2 = pd.read_csv(data_dir / "basic2.tsv", sep="\t", comment="#")
         diff = compare_dataframes(self.df, df2)
         self.assertEqual(15, len(diff.unique_tuples1))
         self.assertEqual(3, len(diff.unique_tuples2))

From af6a7cb94d45e66c77b229ed335c0b07e4c476d5 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:16:14 +0200
Subject: [PATCH 12/19] Update util.py

---
 src/sssom/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sssom/util.py b/src/sssom/util.py
index 8458be11..8baedfe7 100644
--- a/src/sssom/util.py
+++ b/src/sssom/util.py
@@ -997,7 +997,7 @@ def inject_metadata_into_df(msdf: MappingSetDataFrame) -> MappingSetDataFrame:
 ExtensionLiteral = Literal["tsv", "csv"]
 
 
-def get_file_extension(file: PathOrIO) -> ExtensionLiteral | None:
+def get_file_extension(file: PathOrIO) -> Optional[ExtensionLiteral]:
     """Get file extension.
 
     :param file: File path

From 8065dfe1f589d21073caba030b0ec4ed0dcba780 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:16:40 +0200
Subject: [PATCH 13/19] Update parsers.py

---
 src/sssom/parsers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index e87ae527..dd2779e8 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -177,7 +177,7 @@ def _read_pandas_and_metadata(input: io.StringIO, sep: str | None = None):
 EXTENSION_TO_SEP: dict[ExtensionLiteral, str] = {"tsv": "\t", "csv": ","}
 
 
-def _infer_separator(file: PathOrIO) -> str | None:
+def _infer_separator(file: PathOrIO) -> Optional[str]:
     r"""Infer the CSV separator from a file path or IO object.
 
     :param file: the file path

From 7fee8d210b03bcb7ed4471b960e6abe1dcf1471e Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:20:16 +0200
Subject: [PATCH 14/19] Update util.py

---
 src/sssom/util.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/sssom/util.py b/src/sssom/util.py
index 8baedfe7..64691014 100644
--- a/src/sssom/util.py
+++ b/src/sssom/util.py
@@ -9,7 +9,6 @@
 from dataclasses import dataclass, field
 from functools import partial, reduce
 from pathlib import Path
-from string import punctuation
 from typing import Any, DefaultDict, Dict, List, Literal, Optional, Set, TextIO, Tuple, Union
 
 import curies

From eb415893fe36a8e23be3e4790733ffd3d97c119f Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:29:05 +0200
Subject: [PATCH 15/19] Update parsers.py

---
 src/sssom/parsers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index dd2779e8..0e90e90b 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -145,7 +145,7 @@ def _separate_metadata_and_table_from_stream(s: io.StringIO):
     return table_component, metadata_component
 
 
-def _read_pandas_and_metadata(input: io.StringIO, sep: str | None = None):
+def _read_pandas_and_metadata(input: io.StringIO, sep: Optional[str] = None):
     """Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
 
     :param input: The file to read. If no separator is given, this file should be named.
@@ -287,7 +287,7 @@ def parse_sssom_table(
     meta: Optional[MetadataType] = None,
     *,
     strict: bool = False,
-    sep: str | None = None,
+    sep: Optional[str] = None,
     **kwargs: Any,
 ) -> MappingSetDataFrame:
     """Parse a SSSOM CSV or TSV file.

From 521e94687d8caf27b635854a3d73051b6ecb671e Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 20:55:44 +0200
Subject: [PATCH 16/19] Refactor file opening

---
 src/sssom/parsers.py  | 77 +++++++++++++++++++++++--------------------
 src/sssom/util.py     | 11 ++++---
 tests/test_parsers.py |  8 ++---
 3 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index 0e90e90b..6a44a0dc 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -5,6 +5,7 @@
 import itertools as itt
 import json
 import logging as _logging
+import os.path
 import re
 import typing
 from collections import ChainMap, Counter
@@ -74,46 +75,51 @@
 
 logging = _logging.getLogger(__name__)
 
+
 # * *******************************************************
 # Parsers (from file)
 
 
-def _open_input(input: PathOrIO) -> io.StringIO:
+def _open_input(p: PathOrIO) -> TextIO:
     """Transform a URL, a filepath (from pathlib), or a string (with file contents) to a StringIO object.
 
-    :param input: A string representing a URL, a filepath, or file contents,
-                              or a Path object representing a filepath.
+    :param p:
+        A string representing a URL, a filepath, or file contents, or a Path object representing a filepath.
     :return: A StringIO object containing the input data.
     """
-    # If the import already is a StrinIO, return it
-    if isinstance(input, io.StringIO):
-        return input
-    elif isinstance(input, Path):
-        input = str(input)
-
-    if isinstance(input, str):
-        if input.startswith("http://") or input.startswith("https://"):
-            # It's a URL
-            data = requests.get(input, timeout=30).content
-            return io.StringIO(data.decode("utf-8"))
-        elif "\n" in input or "\r" in input:
-            # It's string data
-            return io.StringIO(input)
-        elif input.endswith(".gz"):
-            with gzip.open(input, "rt") as file:
-                file_content = file.read()
-            return io.StringIO(file_content)
-        else:
-            # It's a local file path
-            with open(input, "r") as file:
-                file_content = file.read()
-            return io.StringIO(file_content)
-
-    raise IOError(f"Could not determine the type of input {input}")
+    # if we passed an IO object, return it back directly
+    if not isinstance(p, (str, Path)):
+        return p
+
+    if isinstance(p, str) and (p.startswith("http://") or p.startswith("https://")):
+        # It's a URL
+        data = requests.get(p, timeout=30).content
+        return io.StringIO(data.decode("utf-8"))
+
+    # squash a path to a string so we don't have to duplicate logic below
+    if isinstance(p, Path):
+        p = p.as_posix()
+
+    if "\n" in p or "\r" in p:
+        # It's string data
+        return io.StringIO(p)
+
+    if not os.path.exists(p):
+        raise FileNotFoundError(f"file does not exist: {p}")
+
+    if p.endswith(".gz"):
+        with gzip.open(p, "rt") as file:
+            file_content = file.read()
+        return io.StringIO(file_content)
+    else:
+        # It's a local file path
+        with open(p, "r") as file:
+            file_content = file.read()
+        return io.StringIO(file_content)
 
 
-def _separate_metadata_and_table_from_stream(s: io.StringIO):
-    s.seek(0)
+def _separate_metadata_and_table_from_stream(stream: TextIO):
+    stream.seek(0)
 
     # Create a new StringIO object for filtered data
     table_component = io.StringIO()
@@ -122,7 +128,7 @@ def _separate_metadata_and_table_from_stream(s: io.StringIO):
     header_section = True
 
     # Filter out lines starting with '#'
-    for line in s:
+    for line in stream:
         if not line.startswith("#"):
             table_component.write(line)
             if header_section:
@@ -145,14 +151,14 @@ def _separate_metadata_and_table_from_stream(s: io.StringIO):
     return table_component, metadata_component
 
 
-def _read_pandas_and_metadata(input: io.StringIO, sep: Optional[str] = None):
+def _read_pandas_and_metadata(stream: TextIO, sep: Optional[str] = None):
     """Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
 
-    :param input: The file to read. If no separator is given, this file should be named.
+    :param stream: The file to read. If no separator is given, this file should be named.
     :param sep: File separator for pandas
     :return: A pandas dataframe
     """
-    table_stream, metadata_stream = _separate_metadata_and_table_from_stream(input)
+    table_stream, metadata_stream = _separate_metadata_and_table_from_stream(stream)
 
     try:
         df = pd.read_csv(table_stream, sep=sep, dtype=str, engine="python")
@@ -213,7 +219,6 @@ def _is_irregular_metadata(metadata_list: List[Dict]):
 
 
 def _check_redefined_builtin_prefixes(sssom_metadata, meta, prefix_map):
-
     # There are three ways in which prefixes can be communicated, so we will check all of them
     # This is a bit overly draconian, as in the end, only the highest priority one gets picked
     # But since this only constitues a (logging) warning, I think its worth reporting
@@ -314,7 +319,7 @@ def parse_sssom_table(
         logging.warning("unhandled keyword arguments passed: %s", kwargs)
     if isinstance(file_path, Path) or isinstance(file_path, str):
         raise_for_bad_path(file_path)
-    stream: io.StringIO = _open_input(file_path)
+    stream = _open_input(file_path)
     if sep is None:
         sep = _infer_separator(file_path)
     df, sssom_metadata = _read_pandas_and_metadata(stream, sep)
diff --git a/src/sssom/util.py b/src/sssom/util.py
index 64691014..7221d300 100644
--- a/src/sssom/util.py
+++ b/src/sssom/util.py
@@ -9,7 +9,7 @@
 from dataclasses import dataclass, field
 from functools import partial, reduce
 from pathlib import Path
-from typing import Any, DefaultDict, Dict, List, Literal, Optional, Set, TextIO, Tuple, Union
+from typing import Any, DefaultDict, Dict, List, Literal, Optional, Set, Tuple, Union
 
 import curies
 import numpy as np
@@ -1002,10 +1002,13 @@ def get_file_extension(file: PathOrIO) -> Optional[ExtensionLiteral]:
     :param file: File path
     :return: format of the file passed, default tsv
     """
-    if isinstance(file, str):
-        file = Path(file)
-    elif isinstance(file, TextIO):
+    if not isinstance(file, (str, Path)):
+        if not hasattr(file, "name"):
+            logging.debug("cannot guess format for object without name: %s", file)
+            return None
         file = Path(file.name)
+    elif isinstance(file, str):
+        file = Path(file)
 
     filename = file.name.removesuffix(".gz")
     if filename.endswith(".tsv"):
diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index 3bb4605c..ce6c20d0 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -88,11 +88,9 @@ def test_parse_sssom_dataframe_from_file(self):
 
     def test_parse_sssom_dataframe_from_stringio(self):
         """Test parsing a TSV."""
-        input_path = f"{test_data_dir}/basic.tsv"
-        with open(input_path, "r") as file:
-            input_string = file.read()
-        stream = io.StringIO(input_string)
-        msdf = parse_sssom_table(stream)
+        input_path = test_data_dir.joinpath("basic.tsv")
+        with input_path.open() as file:
+            msdf = parse_sssom_table(file)
         output_path = os.path.join(test_out_dir, "test_parse_sssom_dataframe_stream.tsv")
         with open(output_path, "w") as file:
             write_table(msdf, file)

From 5b233efbe259a11bc7f2a1cfa610bdee2bd9fde1 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sat, 24 May 2025 21:05:49 +0200
Subject: [PATCH 17/19] Cleanup interspersed logic

---
 src/sssom/parsers.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index 6a44a0dc..3e48af36 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -94,6 +94,7 @@ def _open_input(p: PathOrIO) -> TextIO:
     if isinstance(p, str) and (p.startswith("http://") or p.startswith("https://")):
         # It's a URL
         data = requests.get(p, timeout=30).content
+        # TODO handle gzipped remote content
         return io.StringIO(data.decode("utf-8"))
 
     # squash a path to a string so we don't have to duplicate logic below
@@ -151,13 +152,20 @@ def _separate_metadata_and_table_from_stream(stream: TextIO):
     return table_component, metadata_component
 
 
-def _read_pandas_and_metadata(stream: TextIO, sep: Optional[str] = None):
+def _read_pandas_and_metadata(file_path: PathOrIO, sep: Optional[str] = None):
     """Read a tabular data file by wrapping func:`pd.read_csv` to handles comment lines correctly.
 
-    :param stream: The file to read. If no separator is given, this file should be named.
+    :param file_path: The file path or stream to read
     :param sep: File separator for pandas
     :return: A pandas dataframe
     """
+    if sep is None:
+        sep = _infer_separator(file_path)
+
+    if isinstance(file_path, (str, Path)):
+        raise_for_bad_path(file_path)
+
+    stream = _open_input(file_path)
     table_stream, metadata_stream = _separate_metadata_and_table_from_stream(stream)
 
     try:
@@ -317,12 +325,8 @@ def parse_sssom_table(
     """
     if kwargs:
         logging.warning("unhandled keyword arguments passed: %s", kwargs)
-    if isinstance(file_path, Path) or isinstance(file_path, str):
-        raise_for_bad_path(file_path)
-    stream = _open_input(file_path)
-    if sep is None:
-        sep = _infer_separator(file_path)
-    df, sssom_metadata = _read_pandas_and_metadata(stream, sep)
+
+    df, sssom_metadata = _read_pandas_and_metadata(file_path, sep)
     if meta is None:
         meta = {}
 

From aabdb93dce39ed0d018b1989133490ff4ee32c57 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sun, 15 Jun 2025 01:51:34 +0200
Subject: [PATCH 18/19] Update test_parsers.py

---
 tests/test_parsers.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_parsers.py b/tests/test_parsers.py
index ce6c20d0..a387e5c2 100644
--- a/tests/test_parsers.py
+++ b/tests/test_parsers.py
@@ -90,7 +90,9 @@ def test_parse_sssom_dataframe_from_stringio(self):
         """Test parsing a TSV."""
         input_path = test_data_dir.joinpath("basic.tsv")
         with input_path.open() as file:
-            msdf = parse_sssom_table(file)
+            input_string = file.read()
+        stream = io.StringIO(input_string)
+        msdf = parse_sssom_table(stream)
         output_path = os.path.join(test_out_dir, "test_parse_sssom_dataframe_stream.tsv")
         with open(output_path, "w") as file:
             write_table(msdf, file)

From dd9a8b744b370c16e91e8253816b34f22ae8b955 Mon Sep 17 00:00:00 2001
From: Charles Tapley Hoyt <cthoyt@gmail.com>
Date: Sun, 15 Jun 2025 01:56:21 +0200
Subject: [PATCH 19/19] Update parsers.py

---
 src/sssom/parsers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
index 3e48af36..94d65b81 100644
--- a/src/sssom/parsers.py
+++ b/src/sssom/parsers.py
@@ -700,18 +700,18 @@ def from_alignment_minidom(
                         _add_valid_mapping_to_list(mdict, mlist, flip_superclass_assertions=True)
 
                 elif node_name == "xml":
-                    if e.firstChild.nodeValue != "yes":
+                    if e.firstChild.nodeValue != "yes":  # type:ignore[union-attr]
                         raise ValueError(
                             "Alignment format: xml element said, but not set to yes. Only XML is supported!"
                         )
                 elif node_name == "onto1":
-                    ms[SUBJECT_SOURCE_ID] = e.firstChild.nodeValue
+                    ms[SUBJECT_SOURCE_ID] = e.firstChild.nodeValue  # type:ignore[union-attr]
                 elif node_name == "onto2":
-                    ms[OBJECT_SOURCE_ID] = e.firstChild.nodeValue
+                    ms[OBJECT_SOURCE_ID] = e.firstChild.nodeValue  # type:ignore[union-attr]
                 elif node_name == "uri1":
-                    ms[SUBJECT_SOURCE] = e.firstChild.nodeValue
+                    ms[SUBJECT_SOURCE] = e.firstChild.nodeValue  # type:ignore[union-attr]
                 elif node_name == "uri2":
-                    ms[OBJECT_SOURCE] = e.firstChild.nodeValue
+                    ms[OBJECT_SOURCE] = e.firstChild.nodeValue  # type:ignore[union-attr]
 
     ms.mappings = mlist  # type: ignore
     mapping_set_document = MappingSetDocument(mapping_set=ms, converter=converter)