open-edge-platform
diff --git a/‎docs/source/markdown/guides/reference/data/datamodules/image.md
Lines changed: 7 additions & 0 deletions b/‎docs/source/markdown/guides/reference/data/datamodules/image.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/source/markdown/guides/reference/data/datamodules/image/tabular.md
Lines changed: 7 additions & 0 deletions b/‎docs/source/markdown/guides/reference/data/datamodules/image/tabular.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/configs/data/tabular.yaml
Lines changed: 73 additions & 0 deletions b/‎examples/configs/data/tabular.yaml
Lines changed: 73 additions & 0 deletions
diff --git a/‎src/anomalib/data/__init__.py
Lines changed: 4 additions & 0 deletions b/‎src/anomalib/data/__init__.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/anomalib/data/datamodules/__init__.py
Lines changed: 2 additions & 1 deletion b/‎src/anomalib/data/datamodules/__init__.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎src/anomalib/data/datamodules/image/__init__.py
Lines changed: 5 additions & 0 deletions b/‎src/anomalib/data/datamodules/image/__init__.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/anomalib/data/datamodules/image/tabular.py
Lines changed: 195 additions & 0 deletions b/‎src/anomalib/data/datamodules/image/tabular.py
Lines changed: 195 additions & 0 deletions
@@ -28,6 +28,13 @@ Dataset format compatible with Intel Geti™.
 Custom folder-based dataset organization.
 :::
 
+:::{grid-item-card} Tabular
+:link: anomalib.data.datamodules.image.Tabular
+:link-type: doc
+
+Custom tabular dataset.
+:::
+
 :::{grid-item-card} Kolektor
 :link: anomalib.data.datamodules.image.Kolektor
 :link-type: doc
 
@@ -0,0 +1,7 @@
+# Tabular Datamodule
+
+```{eval-rst}
+.. automodule:: anomalib.data.datamodules.image.tabular
+   :members:
+   :show-inheritance:
+```
@@ -0,0 +1,73 @@
+class_path: anomalib.data.Tabular
+init_args:
+  name: bottle
+  root: "datasets/MVTecAD/bottle"
+  train_batch_size: 32
+  eval_batch_size: 32
+  num_workers: 8
+  test_split_mode: from_dir
+  test_split_ratio: 0.2
+  val_split_mode: same_as_test
+  val_split_ratio: 0.5
+  seed: null
+  samples:
+    - image_path: train/good/000.png
+      label_index: 0
+      mask_path: ""
+      split: train
+    - image_path: train/good/001.png
+      label_index: 0
+      mask_path: ""
+      split: train
+    - image_path: train/good/002.png
+      label_index: 0
+      mask_path: ""
+      split: train
+    - image_path: train/good/003.png
+      label_index: 0
+      mask_path: ""
+      split: train
+    - image_path: train/good/004.png
+      label_index: 0
+      mask_path: ""
+      split: train
+    - image_path: test/broken_large/000.png
+      label_index: 1
+      mask_path: ground_truth/broken_large/000_mask.png
+      split: test
+    - image_path: test/broken_large/002.png
+      label_index: 1
+      mask_path: ground_truth/broken_large/002_mask.png
+      split: test
+    - image_path: test/broken_large/004.png
+      label_index: 1
+      mask_path: ground_truth/broken_large/004_mask.png
+      split: test
+    - image_path: test/good/000.png
+      label_index: 0
+      mask_path: ""
+      split: test
+    - image_path: test/good/001.png
+      label_index: 0
+      mask_path: ""
+      split: test
+    - image_path: test/good/003.png
+      label_index: 0
+      mask_path: ""
+      split: test
+    - image_path: test/broken_large/001.png
+      label_index: 1
+      mask_path: ground_truth/broken_large/001_mask.png
+      split: test
+    - image_path: test/broken_large/003.png
+      label_index: 1
+      mask_path: ground_truth/broken_large/003_mask.png
+      split: test
+    - image_path: test/good/002.png
+      label_index: 0
+      mask_path: ""
+      split: test
+    - image_path: test/good/004.png
+      label_index: 0
+      mask_path: ""
+      split: test
@@ -61,6 +61,7 @@
     MVTecAD2,
     MVTecLOCO,
     RealIAD,
+    Tabular,
     Visa,
 )
 from .datamodules.video import Avenue, ShanghaiTech, UCSDped, VideoDataFormat
@@ -75,6 +76,7 @@
     KolektorDataset,
     MVTecADDataset,
     MVTecLOCODataset,
+    TabularDataset,
     VADDataset,
     VisaDataset,
 )
@@ -177,6 +179,7 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule
     "MVTecAD2",
     "MVTecLOCO",
     "RealIAD",
+    "Tabular",
     "VAD",
     "Visa",
     # Video Data Modules
@@ -192,6 +195,7 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule
     "KolektorDataset",
     "MVTecADDataset",
     "MVTecLOCODataset",
+    "TabularDataset",
     "VADDataset",
     "VisaDataset",
     "AvenueDataset",
 
@@ -4,7 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from .depth import Folder3D, MVTec3D
-from .image import VAD, BTech, Datumaro, Folder, Kolektor, MVTec, MVTecAD, Visa
+from .image import VAD, BTech, Datumaro, Folder, Kolektor, MVTec, MVTecAD, Tabular, Visa
 from .video import Avenue, ShanghaiTech, UCSDped
 
 __all__ = [
@@ -16,6 +16,7 @@
     "Kolektor",
     "MVTec",  # Include MVTec for backward compatibility
     "MVTecAD",
+    "Tabular",
     "VAD",
     "Visa",
     "Avenue",
 
@@ -10,6 +10,7 @@
 - ``MVTecAD``: MVTec Anomaly Detection Dataset
 - ``MVTecAD2``: MVTec Anomaly Detection Dataset 2
 - ``MVTecLOCO``: MVTec LOCO Dataset with logical and structural anomalies
+- ``Tabular``: Custom tabular dataset with image paths and labels
 - ``VAD``: Valeo Anomaly Detection Dataset
 - ``Visa``: Visual Anomaly Dataset
 
@@ -36,6 +37,7 @@
 from .mvtecad import MVTec, MVTecAD
 from .mvtecad2 import MVTecAD2
 from .realiad import RealIAD
+from .tabular import Tabular
 from .vad import VAD
 from .visa import Visa
 
@@ -54,6 +56,7 @@ class ImageDataFormat(str, Enum):
     - ``MVTEC_AD_2``: MVTec AD 2 Dataset
     - ``MVTEC_3D``: MVTec 3D AD Dataset
     - ``MVTEC_LOCO``: MVTec LOCO Dataset
+    - ``TABULAR``: Custom Tabular Dataset
     - ``REALIAD``: Real-IAD Dataset
     - ``VAD``: Valeo Anomaly Detection Dataset
     - ``VISA``: Visual Anomaly Dataset
@@ -69,6 +72,7 @@ class ImageDataFormat(str, Enum):
     MVTEC_3D = "mvtec_3d"
     MVTEC_LOCO = "mvtec_loco"
     REAL_IAD = "realiad"
+    TABULAR = "tabular"
     VAD = "vad"
     VISA = "visa"
 
@@ -83,6 +87,7 @@ class ImageDataFormat(str, Enum):
     "MVTecAD2",
     "MVTecLOCO",
     "RealIAD",
+    "Tabular",
     "VAD",
     "Visa",
 ]
@@ -0,0 +1,195 @@
+"""Custom Tabular Data Module.
+
+This script creates a custom Lightning DataModule from a table or tabular file
+containing image paths and labels.
+
+Example:
+    Create a Tabular datamodule::
+
+        >>> from anomalib.data import Tabular
+        >>> samples = {
+        ...     "image_path": ["images/image1.png", "images/image2.png", "images/image3.png", ... ],
+        ...     "label_index": [LabelName.NORMAL, LabelName.NORMAL, LabelName.ABNORMAL,  ... ],
+        ...     "split": [Split.TRAIN, Split.TRAIN, Split.TEST, ... ],
+        ... }
+        >>> datamodule = Tabular(
+        ...     name="custom",
+        ...     samples=samples,
+        ...     root="./datasets/custom",
+        ... )
+"""
+
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import IO
+
+import pandas as pd
+from torchvision.transforms.v2 import Transform
+
+from anomalib.data.datamodules.base.image import AnomalibDataModule
+from anomalib.data.datasets.image.tabular import TabularDataset
+from anomalib.data.utils import Split, TestSplitMode, ValSplitMode
+
+
+class Tabular(AnomalibDataModule):
+    """Tabular DataModule.
+
+    Args:
+        name (str): Name of the dataset. Used for logging/saving.
+        samples (dict | list | DataFrame): Pandas ``DataFrame`` or compatible ``list``
+            or ``dict`` containing the dataset information.
+        root (str | Path | None): Root folder containing normal and abnormal
+            directories. Defaults to ``None``.
+        normal_split_ratio (float): Ratio to split normal training images for
+            test set when no normal test images exist.
+            Defaults to ``0.2``.
+        train_batch_size (int): Training batch size.
+            Defaults to ``32``.
+        eval_batch_size (int): Validation/test batch size.
+            Defaults to ``32``.
+        num_workers (int): Number of workers for data loading.
+            Defaults to ``8``.
+        train_augmentations (Transform | None): Augmentations to apply dto the training images
+            Defaults to ``None``.
+        val_augmentations (Transform | None): Augmentations to apply to the validation images.
+            Defaults to ``None``.
+        test_augmentations (Transform | None): Augmentations to apply to the test images.
+            Defaults to ``None``.
+        augmentations (Transform | None): General augmentations to apply if stage-specific
+            augmentations are not provided.
+        test_split_mode (TestSplitMode): Method to obtain test subset.
+            Defaults to ``TestSplitMode.FROM_DIR``.
+        test_split_ratio (float): Fraction of train images for testing.
+            Defaults to ``0.2``.
+        val_split_mode (ValSplitMode): Method to obtain validation subset.
+            Defaults to ``ValSplitMode.FROM_TEST``.
+        val_split_ratio (float): Fraction of images for validation.
+            Defaults to ``0.5``.
+        seed (int | None): Random seed for splitting.
+            Defaults to ``None``.
+
+    Example:
+        Create and setup a tabular datamodule::
+
+            >>> from anomalib.data import Tabular
+            >>> samples = {
+            ...     "image_path": ["images/image1.png", "images/image2.png", "images/image3.png", ... ],
+            ...     "label_index": [LabelName.NORMAL, LabelName.NORMAL, LabelName.ABNORMAL,  ... ],
+            ...     "split": [Split.TRAIN, Split.TRAIN, Split.TEST, ... ],
+            ... }
+            >>> datamodule = Tabular(
+            ...     name="custom",
+            ...     samples=samples,
+            ...     root="./datasets/custom",
+            ... )
+            >>> datamodule.setup()
+
+        Get a batch from train dataloader::
+
+            >>> batch = next(iter(datamodule.train_dataloader()))
+            >>> batch.keys()
+            dict_keys(['image', 'label', 'mask', 'image_path', 'mask_path'])
+
+        Get a batch from test dataloader::
+
+            >>> batch = next(iter(datamodule.test_dataloader()))
+            >>> batch.keys()
+            dict_keys(['image', 'label', 'mask', 'image_path', 'mask_path'])
+    """
+
+    def __init__(
+        self,
+        name: str,
+        samples: dict | list | pd.DataFrame,
+        root: str | Path | None = None,
+        normal_split_ratio: float = 0.2,
+        train_batch_size: int = 32,
+        eval_batch_size: int = 32,
+        num_workers: int = 8,
+        train_augmentations: Transform | None = None,
+        val_augmentations: Transform | None = None,
+        test_augmentations: Transform | None = None,
+        augmentations: Transform | None = None,
+        test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR,
+        test_split_ratio: float = 0.2,
+        val_split_mode: ValSplitMode | str = ValSplitMode.FROM_TEST,
+        val_split_ratio: float = 0.5,
+        seed: int | None = None,
+    ) -> None:
+        self._name = name
+        self.root = root
+        self._unprocessed_samples = samples
+        test_split_mode = TestSplitMode(test_split_mode)
+        val_split_mode = ValSplitMode(val_split_mode)
+        super().__init__(
+            train_batch_size=train_batch_size,
+            eval_batch_size=eval_batch_size,
+            num_workers=num_workers,
+            train_augmentations=train_augmentations,
+            val_augmentations=val_augmentations,
+            test_augmentations=test_augmentations,
+            augmentations=augmentations,
+            test_split_mode=test_split_mode,
+            test_split_ratio=test_split_ratio,
+            val_split_mode=val_split_mode,
+            val_split_ratio=val_split_ratio,
+            seed=seed,
+        )
+
+        self.normal_split_ratio = normal_split_ratio
+
+    def _setup(self, _stage: str | None = None) -> None:
+        self.train_data = TabularDataset(
+            name=self.name,
+            samples=self._unprocessed_samples,
+            split=Split.TRAIN,
+            root=self.root,
+        )
+
+        self.test_data = TabularDataset(
+            name=self.name,
+            samples=self._unprocessed_samples,
+            split=Split.TEST,
+            root=self.root,
+        )
+
+    @property
+    def name(self) -> str:
+        """Get name of the datamodule.
+
+        Returns:
+            Name of the datamodule.
+        """
+        return self._name
+
+    @classmethod
+    def from_file(
+        cls: type["Tabular"],
+        name: str,
+        file_path: str | Path | IO[str] | IO[bytes],
+        file_format: str = "csv",
+        pd_kwargs: dict | None = None,
+        **kwargs,
+    ) -> "Tabular":
+        """Create Tabular Datamodule from file.
+
+        Args:
+            name (str): Name of the dataset. This is used to name the datamodule,
+                especially when logging/saving.
+            file_path (str | Path | file-like): Path or file-like object to tabular
+                file containing the datset information.
+            file_format (str): File format supported by a pd.read_* method, such
+                as ``csv``, ``parquet`` or ``json``.
+                Defaults to ``csv``.
+            pd_kwargs (dict | None): Keyword argument dictionary for the pd.read_* method.
+                Defaults to ``None``.
+            kwargs (dict): Additional keyword arguments for the Tabular Datamodule class.
+
+        Returns:
+            Tabular: Tabular Datamodule
+        """
+        pd_kwargs = pd_kwargs or {}
+        samples = getattr(pd, f"read_{file_format}")(file_path, **pd_kwargs)
+        return cls(name, samples, **kwargs)