🔒 Security Fix: Mitigate Remote Code Execution Risk in torch.load (#2729)

samet-akcay · web-flow · commit 754119aabf9a · 2025-06-04T09:56:26.000+01:00
* Security Fix: Mitigate Remote Code Execution Risk in

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

* Add semgrep ignore line

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

* Add env var to unit tests

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

* Add env var to integration tests

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

* add weights only to true explicitly in all usages

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

* add weights only to true explicitly in all usages

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

* Address torch.Tensor semgrep issue

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;

---------

Signed-off-by: samet-akcay &lt;samet.akcay@intel.com&gt;
diff --git a/src/anomalib/callbacks/model_loader.py b/src/anomalib/callbacks/model_loader.py
@@ -81,4 +81,6 @@ def setup(self, trainer: Trainer, pl_module: AnomalibModule, stage: str | None =
         del trainer, stage  # These variables are not used.
 
         logger.info("Loading the model from %s", self.weights_path)
-        pl_module.load_state_dict(torch.load(self.weights_path, map_location=pl_module.device)["state_dict"])
+        pl_module.load_state_dict(
+            torch.load(self.weights_path, map_location=pl_module.device, weights_only=True)["state_dict"],
+        )
diff --git a/src/anomalib/deploy/inferencers/torch_inferencer.py b/src/anomalib/deploy/inferencers/torch_inferencer.py
@@ -3,6 +3,24 @@
 This module provides the PyTorch inferencer implementation for running inference
 with trained PyTorch models.
 
+.. warning::
+    This is a legacy inferencer. It is recommended to use :class:`anomalib.engine.Engine.predict()`
+    instead, which provides a more modern and feature-rich interface for model inference.
+
+.. danger::
+    **Security Notice**: PyTorch model loading uses Python's pickle module, which can execute code from the checkpoint
+    file.This is a standard PyTorch behavior, not specific to this library. For security, load models only from trusted
+    sources and consider using safer formats like ONNX or TorchScript for production use. To proceed with loading, set:
+
+    - Load models only from trusted sources
+    - Consider using safer formats like ONNX or TorchScript for production use
+
+    To proceed with loading, set:
+
+    .. code-block:: bash
+
+        export TRUST_REMOTE_CODE=1
+
 Example:
     Assume we have a PyTorch model saved as a ``.pt`` file:
 
@@ -36,6 +54,8 @@
 # Copyright (C) 2022-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import logging
+import os
 from pathlib import Path
 
 import numpy as np
@@ -47,10 +67,30 @@
 from anomalib.data import ImageBatch
 from anomalib.data.utils import read_image
 
+logger = logging.getLogger(__name__)
+
 
 class TorchInferencer:
     """PyTorch inferencer for anomaly detection models.
 
+    .. warning::
+        This is a legacy inferencer. It is recommended to use :class:`anomalib.engine.Engine.predict()`
+        instead, which provides a more modern and feature-rich interface for model inference.
+
+    .. danger::
+        **Security Notice**: PyTorch model loading uses Python's pickle module,
+        which can execute code from the checkpoint file. This is a standard PyTorch behavior,
+        not specific to this library. For security:
+
+        - Load models only from trusted sources
+        - Consider using safer formats like ONNX or TorchScript for production use
+
+        To proceed with loading, set:
+
+        .. code-block:: bash
+
+            export TRUST_REMOTE_CODE=1
+
     Args:
         path (str | Path): Path to the PyTorch model weights file.
         device (str, optional): Device to use for inference.
@@ -65,6 +105,7 @@ class TorchInferencer:
     Raises:
         ValueError: If an invalid device is specified.
         ValueError: If the model file has an unknown extension.
+        ValueError: If TRUST_REMOTE_CODE environment variable is not set.
         KeyError: If the checkpoint file does not contain a model.
     """
 
@@ -73,6 +114,10 @@ def __init__(
         path: str | Path,
         device: str = "auto",
     ) -> None:
+        logger.warning(
+            "TorchInferencer is a legacy inferencer. Consider using Engine.predict() instead, "
+            "which provides a more modern and feature-rich interface for model inference.",
+        )
         self.device = self._get_device(device)
 
         # Load the model weights and metadata
@@ -118,6 +163,7 @@ def _load_checkpoint(self, path: str | Path) -> dict:
 
         Raises:
             ValueError: If the model file has an unknown extension.
+            ValueError: If TRUST_REMOTE_CODE environment variable is not set.
 
         Example:
             >>> model = TorchInferencer(path="path/to/model.pt")
@@ -132,6 +178,23 @@ def _load_checkpoint(self, path: str | Path) -> dict:
             msg = f"Unknown PyTorch checkpoint format {path.suffix}. Make sure you save the PyTorch model."
             raise ValueError(msg)
 
+        trust_remote_code_enabled = os.environ.get("TRUST_REMOTE_CODE", "0").lower() in {"1", "true"}
+
+        if not trust_remote_code_enabled:
+            msg = (
+                "Loading this model checkpoint requires executing arbitrary code via Python's pickle module, "
+                "which is disabled by default for security reasons. This can be exploited by malicious model files. "
+                "If you trust the source of this model and understand the risks, "
+                "set the environment variable `TRUST_REMOTE_CODE=1` to allow loading."
+            )
+            raise ValueError(msg)
+
+        logger.warning(
+            "TRUST_REMOTE_CODE is set to True. Loading model using pickle module, "
+            "which is inherently insecure and can lead to arbitrary code execution. "
+            "Only set this to True if you TRUST the source of the checkpoint.",
+        )
+        # nosemgrep: trailofbits.python.pickles-in-pytorch.pickles-in-pytorch
         return torch.load(path, map_location=self.device, weights_only=False)
 
     def load_model(self, path: str | Path) -> nn.Module:
@@ -184,10 +247,7 @@ def predict(self, image: str | Path | np.ndarray | PILImage | torch.Tensor) -> I
         image = self.pre_process(image)
         predictions = self.model(image)
 
-        return ImageBatch(
-            image=image,
-            **predictions._asdict(),
-        )
+        return ImageBatch(image=image, **predictions._asdict())
 
     def pre_process(self, image: torch.Tensor) -> torch.Tensor:
         """Pre-process the input image.
diff --git a/src/anomalib/models/__init__.py b/src/anomalib/models/__init__.py
@@ -52,7 +52,7 @@
 from omegaconf import DictConfig, OmegaConf
 
 from anomalib.models.components import AnomalibModule
-from anomalib.utils.path import convert_to_snake_case
+from anomalib.utils.path import convert_snake_to_pascal_case, convert_to_snake_case, convert_to_title_case
 
 from .image import (
     Cfa,
@@ -117,58 +117,63 @@ class UnknownModelError(ModuleNotFoundError):
 logger = logging.getLogger(__name__)
 
 
-def convert_snake_to_pascal_case(snake_case: str) -> str:
-    """Convert snake_case string to PascalCase.
+def list_models(case: str = "snake") -> set[str]:
+    """List available anomaly detection models.
 
-    This function takes a string in snake_case format (words separated by underscores)
-    and converts it to PascalCase format (each word capitalized and concatenated).
+    Returns a set of model names in the specified format that are available in the
+    anomalib library. This includes both image and video anomaly detection models.
 
     Args:
-        snake_case (str): Input string in snake_case format (e.g. ``"efficient_ad"``)
+        case (str): The format to return model names in. Options are:
+            - "snake_case": Returns names in snake_case format (e.g. "efficient_ad")
+            - "original": Returns the original PascalCase class names (e.g. "EfficientAd")
+            Defaults to "snake_case".
 
     Returns:
-        str: Output string in PascalCase format (e.g. ``"EfficientAd"``)
-
-    Examples:
-        >>> convert_snake_to_pascal_case("efficient_ad")
-        'EfficientAd'
-        >>> convert_snake_to_pascal_case("patchcore")
-        'Patchcore'
-        >>> convert_snake_to_pascal_case("reverse_distillation")
-        'ReverseDistillation'
-    """
-    return "".join(word.capitalize() for word in snake_case.split("_"))
-
-
-def get_available_models() -> set[str]:
-    """Get set of available anomaly detection models.
-
-    Returns a set of model names in snake_case format that are available in the
-    anomalib library. This includes both image and video anomaly detection models.
-
-    Returns:
-        set[str]: Set of available model names in snake_case format (e.g.
-            ``'efficient_ad'``, ``'padim'``, etc.)
+        set[str]: Set of available model names in the specified format.
 
     Example:
-        Get all available models:
+        Get all available models in different formats:
 
-        >>> from anomalib.models import get_available_models
-        >>> models = get_available_models()
+        >>> from anomalib.models import list_models
+        >>> # Get models in snake_case format
+        >>> models = list_models(case="snake")
         >>> print(sorted(list(models)))  # doctest: +NORMALIZE_WHITESPACE
         ['ai_vad', 'cfa', 'cflow', 'csflow', 'dfkde', 'dfm', 'draem',
          'efficient_ad', 'fastflow', 'fre', 'ganomaly', 'padim', 'patchcore',
          'reverse_distillation', 'stfpm', 'uflow', 'vlm_ad', 'winclip']
 
+        >>> # Get models in original PascalCase format
+        >>> models = list_models(case="pascal")
+        >>> print(sorted(list(models)))  # doctest: +NORMALIZE_WHITESPACE
+        ['AiVad', 'Cfa', 'Cflow', 'Csflow', 'Dfkde', 'Dfm', 'Draem',
+         'EfficientAd', 'Fastflow', 'Fre', 'Ganomaly', 'Padim', 'Patchcore',
+         'ReverseDistillation', 'Stfpm', 'Uflow', 'VlmAd', 'WinClip']
+
+        >>> # Get models in title case format
+        >>> models = list_models(case="title")
+        >>> print(sorted(list(models)))  # doctest: +NORMALIZE_WHITESPACE
+        ['Ai Vad', 'Cfa', 'Cflow', 'Csflow', 'Dfkde', 'Dfm', 'Draem',
+         'Efficient Ad', 'Fastflow', 'Fre', 'Ganomaly', 'Padim', 'Patchcore',
+         'Reverse Distillation', 'Stfpm', 'Uflow', 'Vlm Ad', 'Win Clip']
+
     Note:
         The returned model names can be used with :func:`get_model` to instantiate
         the corresponding model class.
     """
-    return {
-        convert_to_snake_case(cls.__name__)
-        for cls in AnomalibModule.__subclasses__()
-        if cls.__name__ != "AnomalyModule"
-    }
+    if case not in {"snake", "pascal", "title"}:
+        msg = f"Unsupported format: {case}. Must be one of: snake, pascal, title"
+        raise ValueError(msg)
+
+    models = {cls.__name__ for cls in AnomalibModule.__subclasses__() if cls.__name__ != "AnomalyModule"}
+
+    if case == "snake":
+        return {convert_to_snake_case(name) for name in models}
+
+    if case == "title":
+        return {convert_to_title_case(name) for name in models}
+
+    return models
 
 
 def _get_model_class_by_name(name: str) -> type[AnomalibModule]:
@@ -207,7 +212,7 @@ def _get_model_class_by_name(name: str) -> type[AnomalibModule]:
         if name == model.__name__.lower():
             model_class = model
     if model_class is None:
-        logger.exception(f"Could not find the model {name}. Available models are {get_available_models()}")
+        logger.exception(f"Could not find the model {name}. Available models are {list_models()}")
         raise UnknownModelError
 
     return model_class
@@ -288,7 +293,7 @@ def get_model(model: DictConfig | str | dict | Namespace, *args, **kwdargs) -> A
                 module = import_module("anomalib.models")
         except ModuleNotFoundError as exception:
             logger.exception(
-                f"Could not find the module {model.class_path}. Available models are {get_available_models()}",
+                f"Could not find the module {model.class_path}. Available models are {list_models()}",
             )
             raise UnknownModelError from exception
         try:
@@ -299,7 +304,7 @@ def get_model(model: DictConfig | str | dict | Namespace, *args, **kwdargs) -> A
             _model = model_class(*args, **init_args)
         except AttributeError as exception:
             logger.exception(
-                f"Could not find the model {model.class_path}. Available models are {get_available_models()}",
+                f"Could not find the model {model.class_path}. Available models are {list_models()}",
             )
             raise UnknownModelError from exception
     else:
diff --git a/src/anomalib/models/image/dsr/torch_model.py b/src/anomalib/models/image/dsr/torch_model.py
@@ -128,7 +128,7 @@ def load_pretrained_discrete_model_weights(self, ckpt: Path, device: torch.devic
             device (torch.device | str | None, optional): Device to load weights
                 to. Defaults to ``None``.
         """
-        self.discrete_latent_model.load_state_dict(torch.load(ckpt, map_location=device))
+        self.discrete_latent_model.load_state_dict(torch.load(ckpt, map_location=device, weights_only=True))
 
     def forward(
         self,
@@ -906,7 +906,7 @@ def __init__(self, num_embeddings: int, embedding_dim: int) -> None:
 
         # necessary to correctly load the checkpoint file
         self.register_buffer("_ema_cluster_size", torch.zeros(num_embeddings))
-        self._ema_w = nn.Parameter(torch.Tensor(num_embeddings, self._embedding_dim))
+        self._ema_w = nn.Parameter(torch.zeros(num_embeddings, self._embedding_dim))
         self._ema_w.data.normal_()
 
     @property
diff --git a/src/anomalib/models/image/efficient_ad/lightning_model.py b/src/anomalib/models/image/efficient_ad/lightning_model.py
@@ -167,7 +167,9 @@ def prepare_pretrained_model(self) -> None:
             pretrained_models_dir / "efficientad_pretrained_weights" / f"pretrained_teacher_{model_size_str}.pth"
         )
         logger.info(f"Load pretrained teacher model from {teacher_path}")
-        self.model.teacher.load_state_dict(torch.load(teacher_path, map_location=torch.device(self.device)))
+        self.model.teacher.load_state_dict(
+            torch.load(teacher_path, map_location=torch.device(self.device), weights_only=True),
+        )
 
     def prepare_imagenette_data(self, image_size: tuple[int, int] | torch.Size) -> None:
         """Prepare ImageNette dataset transformations.
diff --git a/src/anomalib/utils/path.py b/src/anomalib/utils/path.py
@@ -168,6 +168,29 @@ def convert_to_snake_case(s: str) -> str:
     return re.sub(r"__+", "_", s)
 
 
+def convert_snake_to_pascal_case(snake_case: str) -> str:
+    """Convert snake_case string to PascalCase.
+
+    This function takes a string in snake_case format (words separated by underscores)
+    and converts it to PascalCase format (each word capitalized and concatenated).
+
+    Args:
+        snake_case (str): Input string in snake_case format (e.g. ``"efficient_ad"``)
+
+    Returns:
+        str: Output string in PascalCase format (e.g. ``"EfficientAd"``)
+
+    Examples:
+        >>> convert_snake_to_pascal_case("efficient_ad")
+        'EfficientAd'
+        >>> convert_snake_to_pascal_case("patchcore")
+        'Patchcore'
+        >>> convert_snake_to_pascal_case("reverse_distillation")
+        'ReverseDistillation'
+    """
+    return "".join(word.capitalize() for word in snake_case.split("_"))
+
+
 def convert_to_title_case(text: str) -> str:
     """Convert text to title case, handling various text formats.
 
diff --git a/tests/integration/model/test_models.py b/tests/integration/model/test_models.py
@@ -17,12 +17,12 @@
 from anomalib.data import AnomalibDataModule, MVTecAD
 from anomalib.deploy import ExportType
 from anomalib.engine import Engine
-from anomalib.models import AnomalibModule, get_available_models, get_model
+from anomalib.models import AnomalibModule, get_model, list_models
 
 
 def models() -> set[str]:
     """Return all available models."""
-    return get_available_models()
+    return list_models()
 
 
 def export_types() -> list[ExportType]:
diff --git a/tests/integration/tools/test_gradio_entrypoint.py b/tests/integration/tools/test_gradio_entrypoint.py
@@ -38,8 +38,12 @@ def get_functions() -> tuple[Callable, Callable]:
     def test_torch_inference(
         get_functions: tuple[Callable, Callable],
         ckpt_path: Callable[[str], Path],
+        monkeypatch: pytest.MonkeyPatch,
     ) -> None:
         """Test gradio_inference.py."""
+        # Set TRUST_REMOTE_CODE environment variable for the test
+        monkeypatch.setenv("TRUST_REMOTE_CODE", "1")
+
         _ckpt_path = ckpt_path("Padim")
         parser, inferencer = get_functions
         model = Padim.load_from_checkpoint(_ckpt_path)
diff --git a/tests/integration/tools/test_torch_entrypoint.py b/tests/integration/tools/test_torch_entrypoint.py
@@ -35,8 +35,12 @@ def test_torch_inference(
         project_path: Path,
         ckpt_path: Callable[[str], Path],
         get_dummy_inference_image: str,
+        monkeypatch: pytest.MonkeyPatch,
     ) -> None:
         """Test torch_inference.py."""
+        # Set TRUST_REMOTE_CODE environment variable for the test
+        monkeypatch.setenv("TRUST_REMOTE_CODE", "1")
+
         _ckpt_path = ckpt_path("Padim")
         get_parser, infer = get_functions
         model = Padim.load_from_checkpoint(_ckpt_path)
diff --git a/tests/unit/deploy/test_inferencer.py b/tests/unit/deploy/test_inferencer.py