Skip to content

Commit df49ac1

Browse files
Merge pull request #270 from Dana-Farber-AIOS/dev
v2.0.2
2 parents a52f632 + e66a1d7 commit df49ac1

File tree

10 files changed

+107
-64
lines changed

10 files changed

+107
-64
lines changed

.github/workflows/tests-conda.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,5 @@ jobs:
7474
shell: bash -l {0}
7575
run: |
7676
cd docs
77+
pip install -r readthedocs-requirements.txt
7778
make html

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ COPY tests/ /opt/pathml/tests
4444

4545
# install pathml and deepcell
4646
RUN pip3 install --upgrade pip \
47-
&& pip3 install numpy==1.19.5 \
47+
&& pip3 install numpy==1.19.5 spams \
4848
&& pip3 install python-bioformats==4.0.0 deepcell /opt/pathml/ pytest
4949

5050
# run tests to verify container

docs/readthedocs-requirements.txt

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
sphinx>=3.4.3
2-
nbsphinx>=0.8.1
3-
nbsphinx-link>=1.3.0
4-
sphinx-rtd-theme>=0.5.1
5-
sphinx-autoapi
6-
Ipython
7-
sphinx-copybutton
1+
sphinx==4.3.2
2+
nbsphinx==0.8.8
3+
nbsphinx-link==1.3.0
4+
sphinx-rtd-theme==1.0.0
5+
sphinx-autoapi==1.8.4
6+
ipython==7.30.1
7+
sphinx-copybutton==0.4.0

environment.yml

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,30 +8,24 @@ dependencies:
88
- pip==21.2.2
99
- python==3.8
1010
- numpy==1.19.5
11-
- scipy==1.7.1
11+
- scipy==1.7.3
1212
- scikit-image==0.18.3
13-
- matplotlib==3.1.3
13+
- matplotlib==3.5.1
1414
- python-spams==2.6.1
1515
- openjdk==8.0.152
16-
- pytorch==1.9.0
16+
- pytorch==1.10.1
1717
- h5py==3.1.0
18-
- dask==2021.7.1
19-
- pydicom==2.1.2
18+
- dask==2021.12.0
19+
- pydicom==2.2.2
2020
- pytest==6.2.5
21-
- pre-commit==2.13.0
21+
- pre-commit==2.16.0
2222
- coverage==5.5
2323
- pip:
24+
- python-bioformats==4.0.0
25+
- python-javabridge==4.0.0
26+
- deepcell==0.11.0
2427
- opencv-contrib-python==4.5.3.56
2528
- openslide-python==1.1.2
26-
- javabridge==1.0.19
27-
- python-bioformats==4.0.0
28-
- scanpy==1.7.2
29-
- anndata==0.7.6
30-
- ipython==7.27.0
31-
- sphinx==4.2.0
32-
- nbsphinx==0.8.7
33-
- nbsphinx-link==1.3.0
34-
- sphinx-rtd-theme==1.0.0
35-
- sphinx-autoapi==1.8.4
36-
- sphinx-copybutton==0.4.0
37-
- tqdm
29+
- scanpy==1.8.2
30+
- anndata==0.7.8
31+
- tqdm==4.62.3

pathml/core/slide_backends.py

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,13 @@
55

66
from io import BytesIO
77
from typing import Tuple
8+
89
import numpy as np
910
import openslide
11+
import pathml.core
12+
import pathml.core.tile
13+
from javabridge.jutil import JavaException
14+
from pathml.utils import pil_to_rgb
1015
from PIL import Image
1116
from pydicom.dataset import Dataset
1217
from pydicom.encaps import get_frame_offsets
@@ -15,11 +20,6 @@
1520
from pydicom.tag import SequenceDelimiterTag, TupleTag
1621
from pydicom.uid import UID
1722
from scipy.ndimage import zoom
18-
from javabridge.jutil import JavaException
19-
20-
import pathml.core
21-
import pathml.core.tile
22-
from pathml.utils import pil_to_rgb
2323

2424
try:
2525
import bioformats
@@ -310,7 +310,7 @@ def get_image_shape(self, level=None):
310310
), f"input level {level} invalid for slide with {self.level_count} levels total"
311311
return self.shape_list[level][:2]
312312

313-
def extract_region(self, location, size, level=0):
313+
def extract_region(self, location, size, level=0, series_as_channels=False):
314314
"""
315315
Extract a region of the image. All bioformats images have 5 dimensions representing
316316
(x, y, z, channel, time). Even if an image does not have multiple z-series or time-series,
@@ -323,14 +323,12 @@ def extract_region(self, location, size, level=0):
323323
size (Tuple[int, int, ...]): (X,Y) size of each region. If an integer is passed, will convert to a
324324
tuple of (H, W) and extract a square region. If a tuple with len < 5 is passed, missing
325325
dimensions will be retrieved in full.
326-
level (int): level from which to extract chunks. Level 0 is highest resolution.
326+
level (int): level from which to extract chunks. Level 0 is highest resolution. Defaults to 0.
327+
series_as_channels (bool): Whether to treat image series as channels. If ``True``, multi-level images
328+
are not supported. Defaults to ``False``.
327329
328330
Returns:
329-
np.ndarray: image at the specified region
330-
331-
Example:
332-
Extract 2000x2000 x,y region from upper left corner of 7 channel, 2d fluorescent image.
333-
data.slide.extract_region(location = (0,0), size = 2000)
331+
np.ndarray: image at the specified region. 5-D array of (x, y, z, c, t)
334332
"""
335333
if level is None:
336334
level = 0
@@ -359,6 +357,11 @@ def extract_region(self, location, size, level=0):
359357
raise ValueError(
360358
f"input size {size} invalid. Must be a tuple of integer coordinates of len<2"
361359
)
360+
if series_as_channels:
361+
assert (
362+
level == 0
363+
), f"Multi-level images not supported with series_as_channels=True. Input 'level={level}' invalid. Use 'level=0'."
364+
362365
javabridge.start_vm(class_path=bioformats.JARS, max_heap_size="100G")
363366
with bioformats.ImageReader(str(self.filename), perform_init=True) as reader:
364367
# expand size
@@ -370,32 +373,35 @@ def extract_region(self, location, size, level=0):
370373
arrayshape = tuple(arrayshape)
371374
array = np.empty(arrayshape)
372375

376+
# read a very small region to check whether the image has channels incorrectly stored as series
373377
sample = reader.read(
374378
z=0,
375379
t=0,
376380
series=level,
377381
rescale=False,
378-
XYWH=(location[0], location[1], size[0], size[1]),
382+
XYWH=(location[0], location[1], 2, 2),
379383
)
380384

381-
if len(sample.shape) == 2:
385+
# need this part because some facilities output images where the channels are incorrectly stored as series
386+
# in this case we pull the image for each series, then stack them together as channels
387+
if series_as_channels:
382388
for z in range(self.shape_list[level][2]):
383389
for c in range(self.shape_list[level][3]):
384390
for t in range(self.shape_list[level][4]):
385391
slicearray = reader.read(
386392
z=z,
387393
t=t,
388-
series=level,
394+
series=c,
389395
rescale=False,
390396
XYWH=(location[0], location[1], size[0], size[1]),
391397
)
392398
slicearray = np.asarray(slicearray)
393399
# some file formats read x, y out of order, transpose
394-
if slicearray.shape[:2] != array.shape[:2]:
395-
slicearray = np.transpose(slicearray)
400+
slicearray = np.transpose(slicearray)
396401
array[:, :, z, c, t] = slicearray
397-
# if series is set to read all channels, read all c simultaneously
398-
elif len(sample.shape) == 3:
402+
403+
# in this case, channels are correctly stored as channels, and we can support multi-level images as series
404+
else:
399405
for z in range(self.shape_list[level][2]):
400406
for t in range(self.shape_list[level][4]):
401407
slicearray = reader.read(
@@ -409,10 +415,13 @@ def extract_region(self, location, size, level=0):
409415
# some file formats read x, y out of order, transpose
410416
if slicearray.shape[:2] != array.shape[:2]:
411417
slicearray = np.transpose(slicearray)
412-
slicearray = np.moveaxis(slicearray, 0, -1)
413-
array[:, :, z, :, t] = slicearray
414-
else:
415-
raise Exception("image format not supported")
418+
# in 2d undoes transpose
419+
if len(sample.shape) == 3:
420+
slicearray = np.moveaxis(slicearray, 0, -1)
421+
if len(sample.shape) == 3:
422+
array[:, :, z, :, t] = slicearray
423+
else:
424+
array[:, :, z, level, t] = slicearray
416425

417426
array = array.astype(np.uint8)
418427
return array
@@ -448,7 +457,7 @@ def get_thumbnail(self, size=None):
448457
image_array = zoom(array, ratio)
449458
return image_array
450459

451-
def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
460+
def generate_tiles(self, shape=3000, stride=None, pad=False, level=0, **kwargs):
452461
"""
453462
Generator over tiles.
454463
@@ -511,7 +520,7 @@ def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
511520
if coords[0] + shape[0] < i and coords[1] + shape[1] < j:
512521
# get image for tile
513522
tile_im = self.extract_region(
514-
location=coords, size=shape, level=level
523+
location=coords, size=shape, level=level, **kwargs
515524
)
516525
yield pathml.core.tile.Tile(image=tile_im, coords=coords)
517526
else:
@@ -520,7 +529,7 @@ def generate_tiles(self, shape=3000, stride=None, pad=False, level=0):
520529
j - coords[1] if coords[1] + shape[1] > j else shape[1],
521530
)
522531
tile_im = self.extract_region(
523-
location=coords, size=unpaddedshape, level=level
532+
location=coords, size=unpaddedshape, level=level, **kwargs
524533
)
525534
zeroarrayshape = list(tile_im.shape)
526535
zeroarrayshape[0], zeroarrayshape[1] = (

pathml/core/slide_data.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ def run(
257257
tile_pad=False,
258258
overwrite_existing_tiles=False,
259259
write_dir=None,
260+
**kwargs,
260261
):
261262
"""
262263
Run a preprocessing pipeline on SlideData.
@@ -317,7 +318,11 @@ def run(
317318
processed_tile_futures = []
318319

319320
for tile in self.generate_tiles(
320-
level=level, shape=tile_size, stride=tile_stride, pad=tile_pad
321+
level=level,
322+
shape=tile_size,
323+
stride=tile_stride,
324+
pad=tile_pad,
325+
**kwargs,
321326
):
322327
if not tile.slide_type:
323328
tile.slide_type = self.slide_type
@@ -338,7 +343,11 @@ def run(
338343

339344
else:
340345
for tile in self.generate_tiles(
341-
level=level, shape=tile_size, stride=tile_stride, pad=tile_pad
346+
level=level,
347+
shape=tile_size,
348+
stride=tile_stride,
349+
pad=tile_pad,
350+
**kwargs,
342351
):
343352
if not tile.slide_type:
344353
tile.slide_type = self.slide_type

pathml/preprocessing/transforms.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,15 @@
1212
import pandas as pd
1313
import pathml.core
1414
import pathml.core.slide_data
15-
import spams
16-
from pathml.utils import (RGB_to_GREY, RGB_to_HSI, RGB_to_HSV, RGB_to_OD,
17-
normalize_matrix_cols)
15+
from pathml.utils import (
16+
RGB_to_GREY,
17+
RGB_to_HSI,
18+
RGB_to_HSV,
19+
RGB_to_OD,
20+
normalize_matrix_cols,
21+
)
1822
from skimage import restoration
19-
from skimage.exposure import (equalize_adapthist, equalize_hist,
20-
rescale_intensity)
23+
from skimage.exposure import equalize_adapthist, equalize_hist, rescale_intensity
2124
from skimage.measure import regionprops_table
2225

2326

@@ -271,7 +274,10 @@ def F(self, image):
271274
image.ndim == 2
272275
), f"input image has shape {image.shape}. Must convert to 1-channel image (H, W)."
273276
_, out = cv2.threshold(
274-
src=image, thresh=self.threshold, maxval=self.max_value, type=self.type,
277+
src=image,
278+
thresh=self.threshold,
279+
maxval=self.max_value,
280+
type=self.type,
275281
)
276282
return out.astype(np.uint8)
277283

@@ -597,6 +603,10 @@ class StainNormalizationHE(Transform):
597603
Default can be used, or you can also fit to a reference slide of your choosing by calling
598604
:meth:`~pathml.preprocessing.transforms.StainNormalizationHE.fit_to_reference`.
599605
606+
Note:
607+
If using ``stain_estimation_method = "Vahadane"``, `spams <http://thoth.inrialpes.fr/people/mairal/spams/>`_
608+
must be installed, along with all of its dependencies (i.e. libblas & liblapack).
609+
600610
References:
601611
Macenko, M., Niethammer, M., Marron, J.S., Borland, D., Woosley, J.T., Guan, X., Schmitt, C. and Thomas, N.E.,
602612
2009, June. A method for normalizing histology slides for quantitative analysis. In 2009 IEEE International
@@ -635,6 +645,14 @@ def __init__(
635645
0 <= background_intensity <= 255
636646
), f"Error: input background intensity {background_intensity} must be an integer between 0 and 255"
637647

648+
if stain_estimation_method.lower() == "vahadane":
649+
try:
650+
import spams
651+
except (ImportError, ModuleNotFoundError):
652+
raise Exception(
653+
"Vahadane method requires `spams` package to be installed"
654+
)
655+
638656
self.target = target.lower()
639657
self.stain_estimation_method = stain_estimation_method.lower()
640658
self.optical_density_threshold = optical_density_threshold
@@ -723,6 +741,10 @@ def _estimate_stain_vectors_vahadane(self, image, random_seed=0):
723741
Args:
724742
image (np.ndarray): RGB image
725743
"""
744+
try:
745+
import spams
746+
except (ImportError, ModuleNotFoundError):
747+
raise Exception("Vahadane method requires `spams` package to be installed")
726748
# convert to Optical Density (OD) space
727749
image_OD = RGB_to_OD(image)
728750
# reshape to (M*N)x3
@@ -823,6 +845,10 @@ def _estimate_pixel_concentrations_lasso(self, image, stain_matrix):
823845
stain_matrix (np.ndarray): matrix of H and E stain vectors in optical density (OD) space.
824846
Stain_matrix is (3, 2) and first column corresponds to hematoxylin by convention.
825847
"""
848+
try:
849+
import spams
850+
except (ImportError, ModuleNotFoundError):
851+
raise Exception("Vahadane method requires `spams` package to be installed")
826852
image_OD = RGB_to_OD(image).reshape(-1, 3)
827853

828854
# Get concentrations of each stain at each pixel

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
"openslide-python",
3333
"pydicom",
3434
"h5py",
35-
"spams",
3635
"scikit-learn",
3736
"dask[distributed]",
3837
"anndata>=0.7.6",

tests/core_tests/test_slide_backends.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,14 @@ def dicom_backend():
3030

3131

3232
@pytest.mark.parametrize(
33-
"backend", [openslide_backend(), bioformats_backend(), bioformats_backend_qptiff()]
33+
"backend",
34+
[
35+
openslide_backend(),
36+
bioformats_backend(),
37+
bioformats_backend_qptiff(),
38+
],
3439
)
35-
@pytest.mark.parametrize("location", [(0, 0), (50, 100)])
40+
@pytest.mark.parametrize("location", [(0, 0), (50, 60)])
3641
@pytest.mark.parametrize("size", [50, (50, 100)])
3742
@pytest.mark.parametrize("level", [None, 0])
3843
def test_extract_region(backend, location, size, level):

tests/test_manuscript_urls.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
[
1313
"https://www.pathml.org",
1414
# Vignettes
15-
# "https://github.com/Dana-Farber-AIOS/pathml/tree/master/examples/vignettes/",
15+
"https://github.com/Dana-Farber-AIOS/pathml/tree/master/examples/vignettes/",
1616
# docs
1717
"https://pathml.readthedocs.io/en/latest/",
1818
],

0 commit comments

Comments
 (0)