feat(infernce): multi-gpu inference support

okunator · okunator · commit 48c0a0ad25f1 · 2022-10-18T18:49:38.000+03:00
diff --git a/cellseg_models_pytorch/inference/_base_inferer.py b/cellseg_models_pytorch/inference/_base_inferer.py
@@ -7,6 +7,7 @@
 import numpy as np
 import torch
 import torch.nn as nn
+import yaml
 from pathos.multiprocessing import ThreadPool as Pool
 from torch.utils.data import DataLoader
 from tqdm import tqdm
@@ -31,6 +32,7 @@ def __init__(
         batch_size: int = 8,
         normalization: str = None,
         device: str = "cuda",
+        n_devices: int = 1,
         save_masks: bool = True,
         save_intermediate: bool = False,
         save_dir: Union[Path, str] = None,
@@ -72,6 +74,9 @@ def __init__(
                 One of: "dataset", "minmax", "norm", "percentile", None.
             device : str, default="cuda"
                 The device of the input and model. One of: "cuda", "cpu"
+            n_devices : int, default=1
+                Number of devices (cpus/gpus) used for inference.
+                The model will be copied into these devices.
             save_masks : bool, default=False
                 If True, the resulting segmentation masks will be saved into `out_masks`
                 variable.
@@ -95,6 +100,16 @@ def __init__(
             **postproc_kwargs:
                 Arbitrary keyword arguments for the post-processing.
         """
+        # basic inits
+        self.model = model
+        self.out_heads = self._get_out_info()  # the names and num channels of out heads
+        self.batch_size = batch_size
+        self.patch_size = patch_size
+        self.padding = padding
+        self.out_activations = out_activations
+        self.out_boundary_weights = out_boundary_weights
+        self.head_kwargs = self._check_and_set_head_args()
+
         self.save_dir = Path(save_dir) if save_dir is not None else None
         self.save_masks = save_masks
         self.save_intermediate = save_intermediate
@@ -106,17 +121,17 @@ def __init__(
             folder_ds, batch_size=batch_size, shuffle=False, pin_memory=True
         )
 
-        # model and device
-        self.model = model
-        if device == "cpu":
-            self.model.cpu()
-            self.device = torch.device("cpu")
-        if torch.cuda.is_available() and device == "cuda":
-            self.model.cuda()
-            self.device = torch.device("cuda")
-
-        self.model.eval()
+        # Set post processor
+        self.postprocessor = PostProcessor(
+            instance_postproc,
+            inst_key=self.model.inst_key,
+            aux_key=self.model.aux_key,
+            type_post_proc=type_post_proc,
+            sem_post_proc=sem_post_proc,
+            **postproc_kwargs,
+        )
 
+        # load weights and set devices
         if checkpoint_path is not None:
             ckpt = torch.load(
                 checkpoint_path, map_location=lambda storage, loc: storage
@@ -130,30 +145,41 @@ def __init__(
             except BaseException as e:
                 print(e)
 
-        #
+        assert device in ("cuda", "cpu")
+        if device == "cpu":
+            self.device = torch.device("cpu")
+        if torch.cuda.is_available() and device == "cuda":
+            self.device = torch.device("cuda")
+
+            if torch.cuda.device_count() > 1 and n_devices > 1:
+                self.model = nn.DataParallel(self.model, device_ids=range(n_devices))
+
+        self.model.to(self.device)
+        self.model.eval()
+
+        # Helper class to perform forward + extra processing
         self.predictor = Predictor(
             model=self.model,
             patch_size=patch_size,
             normalization=normalization,
             device=self.device,
         )
-        self.out_heads = self._get_out_info()  # the names and num channels of out heads
-        self.batch_size = batch_size
-        self.patch_size = patch_size
-        self.padding = padding
-        self.out_activations = out_activations
-        self.out_boundary_weights = out_boundary_weights
-        self.head_kwargs = self._check_and_set_head_args()
 
-        #
-        self.postprocessor = PostProcessor(
-            instance_postproc,
-            inst_key=self.model.inst_key,
-            aux_key=self.model.aux_key,
-            type_post_proc=type_post_proc,
-            sem_post_proc=sem_post_proc,
-            **postproc_kwargs,
-        )
+    @classmethod
+    def from_yaml(cls, model: nn.Module, yaml_path: str):
+        """Initialize the inferer from a yaml-file.
+
+        Parameters
+        ----------
+            model : nn.Module
+                Initialized segmentation model.
+            yaml_path : str
+                Path to the yaml file containing rest of the params
+        """
+        with open(yaml_path, "r") as stream:
+            kwargs = yaml.full_load(stream)
+
+        return cls(model, **kwargs)
 
     @abstractmethod
     def _infer_batch(self):
diff --git a/cellseg_models_pytorch/inference/predictor.py b/cellseg_models_pytorch/inference/predictor.py
@@ -184,11 +184,15 @@ def classify(
         if apply_weights:
             # work out the tensor shape first for the weight mat
             B, C = patch.shape[:2]
-            W = torch.repeat_interleave(
-                self.weight_mat,
-                dim=1,
-                repeats=C,
-            ).repeat_interleave(repeats=B, dim=0)
+            W = (
+                torch.repeat_interleave(
+                    self.weight_mat,
+                    dim=1,
+                    repeats=C,
+                )
+                .repeat_interleave(repeats=B, dim=0)
+                .to(patch.device)
+            )
             patch *= W
 
         # apply classification activation
diff --git a/cellseg_models_pytorch/inference/resize_inferer.py b/cellseg_models_pytorch/inference/resize_inferer.py
@@ -21,6 +21,7 @@ def __init__(
         batch_size: int = 8,
         normalization: str = None,
         device: str = "cuda",
+        n_devices: int = 1,
         save_masks: bool = True,
         save_intermediate: bool = False,
         save_dir: Union[Path, str] = None,
@@ -69,6 +70,9 @@ def __init__(
                 "minmax", "norm", "percentile", None.
             device : str, default="cuda"
                 The device of the input and model. One of: "cuda", "cpu"
+            n_devices : int, default=1
+                Number of devices (cpus/gpus) used for inference.
+                The model will be copied into these devices.
             save_masks : bool, default=False
                 If True, the resulting segmentation masks will be saved into `out_masks`
                 variable.
@@ -103,6 +107,7 @@ def __init__(
             normalization=normalization,
             instance_postproc=instance_postproc,
             device=device,
+            n_devices=n_devices,
             save_masks=save_masks,
             save_intermediate=save_intermediate,
             save_dir=save_dir,
diff --git a/cellseg_models_pytorch/inference/sliding_window_inferer.py b/cellseg_models_pytorch/inference/sliding_window_inferer.py
@@ -24,6 +24,7 @@ def __init__(
         batch_size: int = 8,
         normalization: str = None,
         device: str = "cuda",
+        n_devices: int = 1,
         save_masks: bool = True,
         save_intermediate: bool = False,
         save_dir: Union[Path, str] = None,
@@ -71,6 +72,9 @@ def __init__(
                 "minmax", "norm", "percentile", None.
             device : str, default="cuda"
                 The device of the input and model. One of: "cuda", "cpu"
+            n_devices : int, default=1
+                Number of devices (cpus/gpus) used for inference.
+                The model will be copied into these devices.
             save_masks : bool, default=False
                 If True, the resulting segmentation masks will be saved into `out_masks`
                 variable.
@@ -110,6 +114,7 @@ def __init__(
             save_dir=save_dir,
             checkpoint_path=checkpoint_path,
             n_images=n_images,
+            n_devices=n_devices,
             type_post_proc=type_post_proc,
             sem_post_proc=sem_post_proc,
             **postproc_kwargs,
diff --git a/changelog.d/20221018_184815_oskari.lehtonen.md b/changelog.d/20221018_184815_oskari.lehtonen.md
@@ -0,0 +1,3 @@
+## Features
+
+- Add multi-gpu inference via DataParallel

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+## Features`
	`2`	`+`
	`3`	`+- Add multi-gpu inference via DataParallel`