15
15
import pathml .core
16
16
import pathml .preprocessing .pipeline
17
17
from pathml .core .slide_types import SlideType
18
- from torch .utils .data import Dataset
19
18
20
19
21
- def get_file_ext (path ):
20
+ def infer_backend (path ):
22
21
"""
23
- Return the file extension of an input path .
24
- If zipped with 'gz' or 'bz2' extension, will instead return the second to last extension.
25
- If multiple extensions , will return the last two .
22
+ Checks file extensions to try to infer correct backend to use .
23
+ Uses the file extensions from the sets contained in this file (pathml/core/slide_data.py)
24
+ For file formats which are supported by both openslide and bioformats , will return "bioformats" .
26
25
27
26
Args:
28
27
path: path to file
29
28
30
29
Returns:
31
- str: file extension
30
+ str: one of "bioformats", "openslide", "dicom", "h5path"
32
31
"""
33
- p = Path (path )
34
- ext = p .suffixes
35
- if not ext :
36
- raise Exception (f"invalid path has no file extension: { path } " )
37
- elif len (ext ) == 1 :
38
- ext = ext [0 ]
39
- elif len (ext ) >= 2 :
40
- if ext [- 1 ] in {".gz" , ".bz2" }:
41
- ext = ext [- 2 ]
42
- else :
43
- ext = "" .join (ext [- 2 :])
44
- return ext
32
+ path = str (path )
33
+ for extension_set , name in zip (
34
+ [pathmlext , bioformatsext , openslideext , dicomext ],
35
+ ["h5path" , "bioformats" , "openslide" , "dicom" ],
36
+ ):
37
+ for ext in extension_set :
38
+ if path [- len (ext ) :] == ext :
39
+ return name
40
+ raise ValueError (f"input path { path } doesn't match any supported file extensions" )
45
41
46
42
47
43
class SlideData :
@@ -55,8 +51,11 @@ class SlideData:
55
51
tiles (pathml.core.Tiles, optional): object containing {coordinates, tile} pairs
56
52
labels (collections.OrderedDict, optional): dictionary containing {key, label} pairs
57
53
backend (str, optional): backend to use for interfacing with slide on disk.
58
- Must be one of {"OpenSlide", "BioFormats", "DICOM"} (case-insensitive).
54
+ Must be one of {"OpenSlide", "BioFormats", "DICOM", "h5path"} (case-insensitive).
55
+ Note that for supported image formats, OpenSlide performance can be significantly better than BioFormats.
56
+ Consider specifying ``backend = "openslide"`` when possible.
59
57
If ``None``, and a ``filepath`` is provided, tries to infer the correct backend from the file extension.
58
+ Defaults to ``None``.
60
59
slide_type (pathml.core.SlideType, optional): slide type specification. Must be a
61
60
:class:`~pathml.core.SlideType` object. Alternatively, slide type can be specified by using the
62
61
parameters ``stain``, ``tma``, ``rgb``, ``volumetric``, and ``time_series``.
@@ -121,8 +120,8 @@ def __init__(
121
120
), f"slide_type is of type { type (slide_type )} but must be of type pathml.core.types.SlideType"
122
121
assert backend is None or (
123
122
isinstance (backend , str )
124
- and backend .lower () in {"openslide" , "bioformats" , "dicom" }
125
- ), f"backend { backend } must be one of ['OpenSlide', 'BioFormats', 'DICOM'] (case-insensitive)."
123
+ and backend .lower () in {"openslide" , "bioformats" , "dicom" , "h5path" }
124
+ ), f"backend { backend } must be one of ['OpenSlide', 'BioFormats', 'DICOM', 'h5path' ] (case-insensitive)."
126
125
assert counts is None or isinstance (
127
126
counts , anndata .AnnData
128
127
), f"counts is if type { type (counts )} but must be of type anndata.AnnData"
@@ -146,7 +145,7 @@ def __init__(
146
145
147
146
# get name from filepath if no name is provided
148
147
if name is None and filepath is not None :
149
- name = Path (filepath ).stem
148
+ name = Path (filepath ).name
150
149
151
150
_load_from_h5path = False
152
151
@@ -155,21 +154,9 @@ def __init__(
155
154
backend = backend .lower ()
156
155
else :
157
156
# try to infer the correct backend
158
- ext = get_file_ext (filepath )
159
- if ext in openslideext :
160
- backend = "openslide"
161
- elif ext in bioformatsext :
162
- backend = "bioformats"
163
- elif ext in dicomext :
164
- backend = "dicom"
165
- elif ext in pathmlext :
166
- backend = "h5path"
167
- # load SlideData from h5 or h5path
157
+ backend = infer_backend (filepath )
158
+ if backend == "h5path" :
168
159
_load_from_h5path = True
169
- else :
170
- raise ValueError (
171
- f"Backend not specified, but cannot infer correct backend from input path { filepath } "
172
- )
173
160
174
161
if backend .lower () == "openslide" :
175
162
backend_obj = pathml .core .OpenSlideBackend (filepath )
0 commit comments