mahmoodlab
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml
Lines changed: 3 additions & 2 deletions b/‎pyproject.toml
Lines changed: 3 additions & 2 deletions
diff --git a/‎run_batch_of_slides.py
Lines changed: 29 additions & 11 deletions b/‎run_batch_of_slides.py
Lines changed: 29 additions & 11 deletions
diff --git a/‎run_single_slide.py
Lines changed: 4 additions & 4 deletions b/‎run_single_slide.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎tests/test_encoder_same_local_hf.py
Lines changed: 118 additions & 0 deletions b/‎tests/test_encoder_same_local_hf.py
Lines changed: 118 additions & 0 deletions
diff --git a/‎tests/test_openslidewsi.py
Lines changed: 6 additions & 6 deletions b/‎tests/test_openslidewsi.py
Lines changed: 6 additions & 6 deletions
diff --git a/‎tests/test_patch_encoders.py
Lines changed: 1 addition & 1 deletion b/‎tests/test_patch_encoders.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_processor.py
Lines changed: 3 additions & 2 deletions b/‎tests/test_processor.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎tests/test_segmentation_models.py
Lines changed: 4 additions & 3 deletions b/‎tests/test_segmentation_models.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎tests/test_slide_encoders.py
Lines changed: 0 additions & 2 deletions b/‎tests/test_slide_encoders.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎trident/Converter.py
Lines changed: 3 additions & 0 deletions b/‎trident/Converter.py
Lines changed: 3 additions & 0 deletions
@@ -19,6 +19,7 @@ This project was developed by the [Mahmood Lab](https://faisal.ai/) at Harvard M
 - **Slide Feature Extraction**: Extract slide embeddings from 5+ slide foundation models, including [Threads](https://arxiv.org/abs/2501.16652) (coming soon!), [Titan](https://arxiv.org/abs/2411.19666), and [GigaPath](https://www.nature.com/articles/s41586-024-07441-w). 
 
 ### Updates:
+- 04.25: Native support for PIL.Image and CuCIM (use `wsi = load_wsi(xxx.svs)`). Support for seg + patch encoding without Internet.
 - 04.25: Remove artifacts from the tissue segmentation with `--remove_artifacts`. Works well for H&E.  
 - 02.25: New image converter from `czi`, `png`, etc to `tiff`.
 - 02.25: Support for [GrandQC](https://www.nature.com/articles/s41467-024-54769-y) tissue vs. background segmentation.
 
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "trident"
-version = "0.0.5"
+version = "0.1.0"
 description = "A package for preprocessing whole-slide images."
 authors = [
     "Andrew Zhang <andrewzh@mit.edu>",
@@ -43,6 +43,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry.package]
 include = [
     { format = "file", path = "trident/slide_encoder_models/local_ckpts.json" },
-    { format = "file", path = "trident/patch_encoder_models/local_ckpts.json" }
+    { format = "file", path = "trident/patch_encoder_models/local_ckpts.json" },
+    { format = "file", path = "trident/segmentation_models/local_ckpts.json" },
 ]
 include_package_data = true
@@ -10,6 +10,7 @@
 import argparse
 import torch
 from trident import Processor
+from trident import WSIReaderType
 
 
 def parse_arguments():
@@ -23,6 +24,15 @@ def parse_arguments():
                         choices=['cache', 'seg', 'coords', 'feat', 'all'], 
                         help='Task to run: cache, seg (segmentation), coords (save tissue coordinates), img (save tissue images), feat (extract features)')
     parser.add_argument('--job_dir', type=str, required=True, help='Directory to store outputs')
+    parser.add_argument('--wsi_cache', type=str, default=None, 
+                        help='Directory to copy slides to for local processing')
+    parser.add_argument('--clear_cache', action='store_true', default=False, 
+                        help='Delete slides from cache after processing')
+    parser.add_argument('--skip_errors', action='store_true', default=False, 
+                        help='Skip errored slides and continue processing')
+    parser.add_argument('--max_workers', type=int, default=None, help='Maximum number of workers. Set to 0 to use main process.')
+
+    # Slide-related arguments
     parser.add_argument('--wsi_dir', type=str, required=True, 
                         help='Directory containing WSI files (no nesting allowed)')
     parser.add_argument('--wsi_ext', type=str, nargs='+', default=None, 
@@ -31,12 +41,9 @@ def parse_arguments():
                     help='Custom keys used to store the resolution as MPP (micron per pixel) in your list of whole-slide image.')
     parser.add_argument('--custom_list_of_wsis', type=str, default=None,
                     help='Custom list of WSIs specified in a csv file.')
-    parser.add_argument('--wsi_cache', type=str, default=None, 
-                        help='Directory to copy slides to for local processing')
-    parser.add_argument('--clear_cache', action='store_true', default=False, 
-                        help='Delete slides from cache after processing')
-    parser.add_argument('--skip_errors', action='store_true', default=False, 
-                        help='Skip errored slides and continue processing')
+    parser.add_argument('--reader_type', type=str, choices=['openslide', 'image', 'cucim'], default=None,
+                    help='Force the use of a specific WSI image reader. Options are ["openslide", "image", "cucim"]. Defaults to None (auto-determine which reader to use).')
+    
     # Segmentation arguments 
     parser.add_argument('--segmenter', type=str, default='hest', 
                         choices=['hest', 'grandqc'], 
@@ -66,6 +73,16 @@ def parse_arguments():
                                  'kaiko-vits8', 'kaiko-vits16', 'kaiko-vitb8', 'kaiko-vitb16',
                                  'kaiko-vitl14', 'lunit-vits8'],
                         help='Patch encoder to use')
+    parser.add_argument(
+        '--patch_encoder_ckpt_path', type=str, default=None,
+        help=(
+            "Optional local path to a patch encoder checkpoint (.pt, .pth, .bin, or .safetensors). "
+            "This is only needed in offline environments (e.g., compute clusters without internet). "
+            "If not provided, models are downloaded automatically from Hugging Face. "
+            "You can also specify local paths via the model registry at "
+            "`./trident/patch_encoder_models/local_ckpts.json`."
+        )
+    )
     parser.add_argument('--slide_encoder', type=str, default=None, 
                         choices=['threads', 'titan', 'prism', 'gigapath', 'chief', 'madeleine',
                                  'mean-virchow', 'mean-virchow2', 'mean-conch_v1', 'mean-conch_v15', 'mean-ctranspath',
@@ -89,7 +106,9 @@ def initialize_processor(args):
         clear_cache=args.clear_cache,
         skip_errors=args.skip_errors,
         custom_mpp_keys=args.custom_mpp_keys,
-        custom_list_of_wsis=args.custom_list_of_wsis
+        custom_list_of_wsis=args.custom_list_of_wsis,
+        max_workers=args.max_workers,
+        reader_type=args.reader_type
     )
 
 def run_task(processor, args):
@@ -107,12 +126,10 @@ def run_task(processor, args):
         segmentation_model = segmentation_model_factory(
             args.segmenter,
             confidence_thresh=args.seg_conf_thresh,
-            device=f'cuda:{args.gpu}'
         )
         if args.remove_artifacts:
             artifact_remover_model = segmentation_model_factory(
                 'grandqc_artifact',
-                device=f'cuda:{args.gpu}'
             )
         else:
             artifact_remover_model = None
@@ -122,7 +139,8 @@ def run_task(processor, args):
             segmentation_model,
             seg_mag=segmentation_model.target_mag,
             holes_are_tissue= not args.remove_holes,
-            artifact_remover_model=artifact_remover_model
+            artifact_remover_model=artifact_remover_model,
+            device=f'cuda:{args.gpu}',
         )
     elif args.task == 'coords':
         # Minimal example for tissue patching:
@@ -139,7 +157,7 @@ def run_task(processor, args):
             # Minimal example for feature extraction:
             # python run_batch_of_slides.py --task feat --wsi_dir wsis --job_dir trident_processed --patch_encoder uni_v1 --mag 20 --patch_size 256
             from trident.patch_encoder_models.load import encoder_factory
-            encoder = encoder_factory(args.patch_encoder)
+            encoder = encoder_factory(args.patch_encoder, weights_path=args.patch_encoder_ckpt_path)
             processor.run_patch_feature_extraction_job(
                 coords_dir=args.coords_dir or f'{args.mag}x_{args.patch_size}px_{args.overlap}px_overlap',
                 patch_encoder=encoder,
 
@@ -9,7 +9,7 @@
 import argparse
 import os
 
-from trident import OpenSlideWSI
+from trident import load_wsi
 from trident.segmentation_models import segmentation_model_factory
 from trident.patch_encoder_models import encoder_factory
 
@@ -53,19 +53,19 @@ def process_slide(args):
 
     # Initialize the WSI
     print(f"Processing slide: {args.slide_path}")
-    slide = OpenSlideWSI(slide_path=args.slide_path, lazy_init=False, custom_mpp_keys=args.custom_mpp_keys)
+    slide = load_wsi(slide_path=args.slide_path, lazy_init=False, custom_mpp_keys=args.custom_mpp_keys)
 
     # Step 1: Tissue Segmentation
     print("Running tissue segmentation...")
     segmentation_model = segmentation_model_factory(
         model_name=args.segmenter,
         confidence_thresh=args.seg_conf_thresh,
-        device=f"cuda:{args.gpu}"
     )
     slide.segment_tissue(
         segmentation_model=segmentation_model,
         target_mag=segmentation_model.target_mag,
-        job_dir=args.job_dir
+        job_dir=args.job_dir,
+        device=f"cuda:{args.gpu}"
     )
     print(f"Tissue segmentation completed. Results saved to {args.job_dir}contours_geojson and {args.job_dir}contours")
 
 
@@ -0,0 +1,118 @@
+import torch
+import numpy as np 
+from PIL import Image
+import unittest
+import json
+from pathlib import Path
+
+try:
+    import lovely_tensors; lovely_tensors.monkey_patch()
+except:
+    pass
+
+import sys; sys.path.append('../')
+from trident.patch_encoder_models import *
+
+
+class TestEncoderConsistency(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        cls.dummy_image = Image.fromarray(np.random.randint(0, 256, (224, 224, 3), dtype=np.uint8))
+
+    def _load_encoder(self, encoder_name, source, weights_path=None, **kwargs):
+        print(f"  🔧 Loading {encoder_name} ({source})")
+        encoder = encoder_factory(encoder_name, weights_path=weights_path, **kwargs)
+        encoder = encoder.to(self.device)
+        encoder.eval()
+        return encoder
+
+    def _run_forward(self, encoder, encoder_name, source):
+        with torch.inference_mode(), torch.amp.autocast('cuda', dtype=encoder.precision):
+            dummy_input = encoder.eval_transforms(self.dummy_image).to(self.device).unsqueeze(dim=0)
+            output = encoder(dummy_input)
+        print(f"  📐 Output shape from {source}: {tuple(output.shape)}")
+        return output
+
+    def _compare_architecture(self, enc1, enc2):
+        keys1 = set(enc1.state_dict().keys())
+        keys2 = set(enc2.state_dict().keys())
+        if keys1 != keys2:
+            print("\033[1;33m⚠️ Architecture mismatch in keys:\033[0m")
+            print("  Only in default :", keys1 - keys2)
+            print("  Only in local   :", keys2 - keys1)
+            return False
+        return True
+
+    def _compare_weights(self, enc1, enc2):
+        diffs = []
+        for k in enc1.state_dict().keys():
+            w1 = enc1.state_dict()[k]
+            w2 = enc2.state_dict()[k]
+            if not torch.allclose(w1, w2, atol=1e-5, rtol=1e-4):
+                abs_diff = (w1 - w2).abs()
+                max_diff = abs_diff.max().item()
+                mean_diff = abs_diff.mean().item()
+                diffs.append((k, max_diff, mean_diff))
+        if diffs:
+            print("\033[1;33m⚠️ Weight differences found:\033[0m")
+            for k, max_d, mean_d in sorted(diffs, key=lambda x: -x[1])[:10]:
+                print(f"    🔍 {k:<50} max diff: {max_d:.4e}, mean diff: {mean_d:.4e}")
+            return False
+        return True
+
+
+def generate_encoder_test(encoder_name, weights_path, **kwargs):
+    def test(self):
+        header = f"🧪 TEST: {encoder_name}"
+        if kwargs:
+            kwarg_str = ', '.join(f"{k}={v}" for k, v in kwargs.items())
+            header += f" ({kwarg_str})"
+        print(f"\n\033[1;36m{'=' * len(header)}\n{header}\n{'=' * len(header)}\033[0m")
+
+        # Load models
+        enc_default = self._load_encoder(encoder_name, source="default", **kwargs)
+        enc_local = self._load_encoder(encoder_name, source="local checkpoint", weights_path=weights_path, **kwargs)
+
+        # # Compare architecture
+        # arch_match = self._compare_architecture(enc_default, enc_local)
+        # self.assertTrue(arch_match, f"Architecture mismatch in {encoder_name}")
+
+        # # Compare weights
+        # weights_match = self._compare_weights(enc_default, enc_local)
+        # self.assertTrue(weights_match, f"Weight mismatch in {encoder_name}")
+
+        # Run inference
+        out_default = self._run_forward(enc_default, encoder_name, source="default")
+        out_local = self._run_forward(enc_local, encoder_name, source="local checkpoint")
+
+        if torch.allclose(out_default, out_local, atol=1e-5, rtol=1e-4):
+            print(f"\033[1;32m✅ Outputs match for {encoder_name}\033[0m")
+        else:
+            diff = (out_default - out_local).abs().max().item()
+            print(f"\033[1;31m❌ Outputs do NOT match (max abs diff = {diff:.4e})\033[0m")
+            self.fail(f"Output mismatch for {encoder_name} with kwargs={kwargs}")
+    return test
+
+
+# Dynamically register tests before unittest.main()
+def register_tests():
+    ckpt_path = Path('../trident/patch_encoder_models/local_ckpts_guillaume.json')
+    with open(ckpt_path) as f:
+        local_ckpts = json.load(f)
+
+    # local ckpt not supported
+    local_ckpts.pop('musk')
+    local_ckpts.pop('custom_encoder')
+    local_ckpts.pop('hibou_l')
+
+    for encoder_name, path in local_ckpts.items():
+        test_name = f"test_{encoder_name}"
+        test_fn = generate_encoder_test(encoder_name, path)
+        setattr(TestEncoderConsistency, test_name, test_fn)
+
+
+register_tests()
+
+if __name__ == '__main__':
+    unittest.main()
@@ -3,7 +3,7 @@
 import torch  # Check for CUDA availability
 
 import sys; sys.path.append('../')
-from trident import OpenSlideWSI
+from trident import load_wsi
 from trident.segmentation_models import segmentation_model_factory
 from trident.patch_encoder_models import encoder_factory
 
@@ -19,8 +19,8 @@ class TestOpenSlideWSI(unittest.TestCase):
     HF_REPO = "MahmoodLab/unit-testing"
     TEST_SLIDE_FILENAMES = [
         "394140.svs",
-        # "TCGA-AN-A0XW-01Z-00-DX1.811E11E7-FA67-46BB-9BC6-1FD0106B789D.svs",
-        # "TCGA-B6-A0IJ-01Z-00-DX1.BF2E062F-06DA-4CA8-86C4-36674C035CAA.svs"
+        "TCGA-AN-A0XW-01Z-00-DX1.811E11E7-FA67-46BB-9BC6-1FD0106B789D.svs",
+        "TCGA-B6-A0IJ-01Z-00-DX1.BF2E062F-06DA-4CA8-86C4-36674C035CAA.svs"
     ]
     TEST_OUTPUT_DIR = "test_single_slide_processing/"
     TEST_PATCH_ENCODER = "uni_v1"
@@ -50,11 +50,11 @@ def test_integration(self):
         for slide_filename in self.TEST_SLIDE_FILENAMES:
             with self.subTest(slide=slide_filename):
                 slide_path = os.path.join(self.local_wsi_dir, slide_filename)
-                slide = OpenSlideWSI(slide_path=slide_path, lazy_init=False)
+                slide = load_wsi(slide_path=slide_path, lazy_init=False)
 
                 # Step 1: Tissue segmentation
-                segmentation_model = segmentation_model_factory("hest", device=self.TEST_DEVICE)
-                slide.segment_tissue(segmentation_model=segmentation_model, target_mag=10, job_dir=self.TEST_OUTPUT_DIR)
+                segmentation_model = segmentation_model_factory("hest")
+                slide.segment_tissue(segmentation_model=segmentation_model, target_mag=10, job_dir=self.TEST_OUTPUT_DIR, device=self.TEST_DEVICE)
 
                 # Step 2: Tissue coordinate extraction
                 coords_path = slide.extract_tissue_coords(
 
@@ -98,4 +98,4 @@ def test_lunitvits8_forward(self):
 
 
 if __name__ == '__main__':
-    unittest.main()
+    unittest.main()
@@ -86,10 +86,11 @@ def test_tissue_processing(self):
             wsi_ext=self.TEST_WSI_EXT
         )
 
-        segmentation_model = segmentation_model_factory('hest', device=f'cuda:{self.TEST_GPU_INDEX}')
+        segmentation_model = segmentation_model_factory('hest')
         self.processor.run_segmentation_job(
             segmentation_model=segmentation_model,
-            seg_mag=5
+            seg_mag=5,
+            device=f'cuda:{self.TEST_GPU_INDEX}'
         )
         output_dirs = ["contours", "contours_geojson"]
         for dir_name in output_dirs:
 
@@ -23,13 +23,13 @@ def setUp(self):
     def _test_forward(self, encoder_name):
         print("\033[95m" + f"Testing {encoder_name} forward pass" + "\033[0m")
         device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        encoder = segmentation_model_factory(encoder_name, device=device)
+        encoder = segmentation_model_factory(encoder_name).to(device)
 
         self.dummy_image = np.random.randint(0, 256, (encoder.input_size, encoder.input_size, 3), dtype=np.uint8)
         self.dummy_image = Image.fromarray(self.dummy_image)
 
         with torch.inference_mode():
-            dummy_input = encoder.eval_transforms(self.dummy_image).unsqueeze(dim=0)
+            dummy_input = encoder.eval_transforms(self.dummy_image).unsqueeze(dim=0).to(device)
             output = encoder(dummy_input)
 
         self.assertIsNotNone(output)
@@ -39,7 +39,8 @@ def _test_forward(self, encoder_name):
     def test_hest(self):
         self._test_forward('hest')
 
-    # Add more segmentation models here
+    def test_grandqc(self):
+        self._test_forward('grandqc')
 
 if __name__ == '__main__':
     unittest.main()
@@ -2,8 +2,6 @@
 import torch
 
 import sys; sys.path.append('../')
-
-# New imports to test 
 from trident.slide_encoder_models import *
 
 """
 
@@ -16,6 +16,9 @@
 # PIL
 PIL_EXTENSIONS = {'.png', '.jpg', '.jpeg'}
 
+# OpenSlide
+OPENSLIDE_EXTENSIONS = {'.svs', '.tif', '.dcm', '.vms', '.vmu', '.ndpi', '.scn', '.mrxs', '.tiff', '.svslide', '.bif', '.czi'}
+
 # Combined with CZI 
 SUPPORTED_EXTENSIONS = BIOFORMAT_EXTENSIONS | PIL_EXTENSIONS | {'.czi'}
Original file line number	Diff line number	Diff line change
`@@ -98,4 +98,4 @@ def test_lunitvits8_forward(self):`
`98`	`98`
`99`	`99`
`100`	`100`	`if __name__ == '__main__':`
`101`		`- unittest.main()`
	`101`	`+ unittest.main()`
Original file line number	Diff line number	Diff line change
`@@ -86,10 +86,11 @@ def test_tissue_processing(self):`
`86`	`86`	`wsi_ext=self.TEST_WSI_EXT`
`87`	`87`	`)`
`88`	`88`
`89`		`- segmentation_model = segmentation_model_factory('hest', device=f'cuda:{self.TEST_GPU_INDEX}')`
	`89`	`+ segmentation_model = segmentation_model_factory('hest')`
`90`	`90`	`self.processor.run_segmentation_job(`
`91`	`91`	`segmentation_model=segmentation_model,`
`92`		`- seg_mag=5`
	`92`	`+ seg_mag=5,`
	`93`	`+ device=f'cuda:{self.TEST_GPU_INDEX}'`
`93`	`94`	`)`
`94`	`95`	`output_dirs = ["contours", "contours_geojson"]`
`95`	`96`	`for dir_name in output_dirs:`