diff --git a/.gitignore b/.gitignore
index 23ecf4a..c045983 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,4 +162,5 @@ cython_debug/
 **/checkpoints/*
 .vscode/
 
-models/
\ No newline at end of file
+models/
+data/*/
\ No newline at end of file
diff --git a/benchmark/pyod_.py b/benchmark/pyod_.py
index 0f75759..3a54228 100644
--- a/benchmark/pyod_.py
+++ b/benchmark/pyod_.py
@@ -23,10 +23,6 @@
                                          KNN, LMDD, LOF, MCD, OCSVM, PCA,
                                          FeatureBagging, IForest)
 
-# TODO: add sklearnex to accelerate sklearn
-# from sklearnex import patch_sklearn
-# patch_sklearn()
-
 warnings.filterwarnings("ignore")
 
 
diff --git a/docs/Makefile b/docs/Makefile
index 70e71f9..d4bb2cb 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -6,7 +6,7 @@
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
-BUILDDIR      = .. #_build
+BUILDDIR      = _build
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
index 33a972c..544c81f 100644
--- a/docs/_static/custom.css
+++ b/docs/_static/custom.css
@@ -3,4 +3,11 @@
   /* or any size you want */
   height: auto;
   /* keep aspect ratio */
+}
+
+.wy-nav-content {
+  padding: 1.618em 3.236em;
+  height: 100%;
+  max-width: 1600px;
+  margin: auto;
 }
\ No newline at end of file
diff --git a/docs/_static/flowbench.png b/docs/_static/flowbench.png
new file mode 100644
index 0000000..3746ba7
Binary files /dev/null and b/docs/_static/flowbench.png differ
diff --git a/docs/conf.py b/docs/conf.py
index bc4fb22..5f704e0 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,6 +18,7 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.napoleon',
     'sphinx.ext.mathjax',
+    'sphinx.ext.doctest',
 ]
 
 templates_path = ['_templates']
diff --git a/docs/examples.rst b/docs/examples.rst
index ce68152..8681559 100644
--- a/docs/examples.rst
+++ b/docs/examples.rst
@@ -1,3 +1,159 @@
 Examples
 ========
 
+Load Dataset
+------------
+
+- load data as graphs in ``pytorch_geometric`` format:
+
+  .. code-block:: python
+
+    from flowbench.dataset import FlowDataset
+    dataset = FlowDataset(root="./", name="montage")
+    data = dataset[0]
+
+  The ``data`` contains the structural information by accessing ``data.edge_index``, and node feature information ``data.x``.
+
+- load data as tabular data in ``pytorch`` format:
+
+  .. code-block:: python
+
+    from flowbench.dataset import FlowDataset
+    dataset = FlowDataset(root="./", name="montage")
+    data = dataset[0]
+    Xs = data.x
+    ys = data.y
+
+  Unlike the graph ``pyg.data``, the ``data`` only contains the node features.
+
+- load data as tabular data in ``numpy`` format:
+
+  .. code-block:: python
+
+    from flowbench.dataset import FlowDataset
+    dataset = FlowDataset(root="./", name="montage")
+    data = dataset[0]
+    Xs = data.x.numpy()
+    ys = data.y.numpy()
+
+  This is the same as the previous one, but the data is in ``numpy`` format, which is typically used in the models from ``sklearn`` and ``xgboost``.
+
+- load test data with ``huggingface`` interface.
+  We have uploaded our parsed text data in the ``huggingface`` dataset. You can load the data with the following code:
+  
+  .. code-block:: python
+
+    from datasets import load_dataset
+    dataset = load_dataset("cshjin/poseidon", "1000genome")
+
+  The dataset is in the format of ``dict`` with keys ``train``, ``test``, and ``validation``. 
+
+PyOD Models
+-----------
+
+===================  ================  ======================================================================================================  =====  =================================================== 
+Type                 Abbr              Algorithm                                                                                               Year   Class                                               
+===================  ================  ======================================================================================================  =====  =================================================== 
+Probabilistic        ABOD              Angle-Based Outlier Detection                                                                           2008   :class:`flowbench.unsupervised.pyod.ABOD`
+Probabilistic        KDE               Outlier Detection with Kernel Density Functions                                                         2007   :class:`flowbench.unsupervised.pyod.KDE`
+Probabilistic        GMM               Probabilistic Mixture Modeling for Outlier Analysis                                                            :class:`flowbench.unsupervised.pyod.GMM`
+Linear Model         PCA               Principal Component Analysis (the sum of weighted projected distances to the eigenvector hyperplanes)   2003   :class:`flowbench.unsupervised.pyod.PCA`
+Linear Model         OCSVM             One-Class Support Vector Machines                                                                       2001   :class:`flowbench.unsupervised.pyod.OCSVM`
+Linear Model         LMDD              Deviation-based Outlier Detection (LMDD)                                                                1996   :class:`flowbench.unsupervised.pyod.LMDD`
+Proximity-Based      LOF               Local Outlier Factor                                                                                    2000   :class:`flowbench.unsupervised.pyod.LOF`
+Proximity-Based      CBLOF             Clustering-Based Local Outlier Factor                                                                   2003   :class:`flowbench.unsupervised.pyod.CBLOF`
+Proximity-Based      kNN               k Nearest Neighbors (use the distance to the kth nearest neighbor as the outlier score)                 2000   :class:`flowbench.unsupervised.pyod.KNN`
+Outlier Ensembles    IForest           Isolation Forest                                                                                        2008   :class:`flowbench.unsupervised.pyod.IForest`
+Outlier Ensembles    INNE              Isolation-based Anomaly Detection Using Nearest-Neighbor Ensembles                                      2018   :class:`flowbench.unsupervised.pyod.INNE`
+Outlier Ensembles    LSCP              LSCP: Locally Selective Combination of Parallel Outlier Ensembles                                       2019   :class:`flowbench.unsupervised.pyod.LSCP`
+===================  ================  ======================================================================================================  =====  =================================================== 
+
+- Example of using `GMM`
+
+  .. code-block:: python
+
+    from flowbench.pyod import GMM
+    from flowbench.dataset import FlowDataset
+    dataset = FlowDataset(root="./", name="1000genome")
+    Xs = ds.x.numpy()
+    clf = GMM()
+    clf.fit(Xs)
+    y_pred = clf.predict(Xs)
+
+  - Detailed example in ``example/demo_pyod.py``
+
+PyGOD Models
+------------
+
+=========== ==================  =====    ==============================================
+Type        Abbr                Year        Class
+=========== ==================  =====    ==============================================
+Clustering  SCAN                2007              :class:`flowbench.unsupervised.pygod.SCAN`
+GNN+AE      GAE                 2016             :class:`flowbench.unsupervised.pygod.GAE`
+MF          Radar               2017              :class:`flowbench.unsupervised.pygod.Radar`
+MF          ANOMALOUS           2018              :class:`flowbench.unsupervised.pygod.ANOMALOUS`
+MF          ONE                 2019              :class:`flowbench.unsupervised.pygod.ONE`
+GNN+AE      DOMINANT            2019             :class:`flowbench.unsupervised.pygod.DOMINANT`
+MLP+AE      DONE                2020             :class:`flowbench.unsupervised.pygod.DONE`
+MLP+AE      AdONE               2020             :class:`flowbench.unsupervised.pygod.AdONE`
+GNN+AE      AnomalyDAE          2020             :class:`flowbench.unsupervised.pygod.AnomalyDAE`
+GAN         GAAN                2020             :class:`flowbench.unsupervised.pygod.GAAN`
+GNN+AE      DMGD                2020             :class:`flowbench.unsupervised.pygod.DMGD`
+GNN         OCGNN               2021             :class:`flowbench.unsupervised.pygod.OCGNN`
+GNN+AE+SSL  CoLA                2021             :class:`flowbench.unsupervised.pygod.CoLA`
+GNN+AE      GUIDE               2021             :class:`flowbench.unsupervised.pygod.GUIDE`
+GNN+AE+SSL  CONAD               2022             :class:`flowbench.unsupervised.pygod.CONAD`
+GNN+AE      GADNR               2024             :class:`flowbench.unsupervised.pygod.GADNR`
+=========== ==================  =====    ==============================================
+
+
+- Example of using `GMM`
+
+  .. code-block:: python
+
+    from flowbench.unsupervised.pygod import GAE
+    from flowbench.dataset import FlowDataset
+    dataset = FlowDataset(root="./", name="1000genome")
+    data = dataset[0]
+    clf = GAE()
+    clf.fit(data)
+
+  - Detailed example in ``example/demo_pygod.py``
+
+
+Supervised Models
+-----------------
+
+- Example of using `MLP`
+  
+    .. code-block:: python
+  
+      from flowbench.supervised.mlp import MLPClassifier
+      from flowbench.dataset import FlowDataset
+      dataset = FlowDataset(root="./", name="1000genome")
+      data = dataset[0]
+      clf = MLPClassifier()
+      clf.fit(data)
+  
+    - Detailed example in ``example/demo_supervised.py``
+
+Supervised fine-tuned LLMs
+--------------------------
+
+- Example of using LoRA (Low-rank Adaptation) for supervised fine-tuned LLMs:
+
+  .. code-block:: python
+
+    from peft import LoraConfig
+    dataset = load_dataset("cshjin/poseidon", "1000genome")
+    # data processing
+    ...
+    # LoRA config
+    peft_config = LoraConfig(task_type=TaskType.SEQ_CLS, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)
+    training_args = TrainingArgument(...)
+    # LoRA trainer
+    trainer = Trainer(peft_model, ...)
+    trainer.train()
+    ...
+
+  - Detailed example in ``example/demo_sft_lora.py``
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 030f15e..dc8264a 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,7 +10,13 @@ Flow-Bench is a benchmark dataset for anomaly detection techniques in computatio
 Flow-Bench contains workflow execution traces, executed on distributed infrastructure, that include systematically injected anomalies (labeled), and offers both the raw execution logs and a more compact parsed version. 
 In this GitHub repository, apart from the logs and traces, you will find sample code to load and process the parsed data using pytorch, as well as, the code used to parse the raw logs and events.
 
+.. figure:: _static/flowbench.png
+   :alt: FlowBench Outline
+   :align: center
+   :scale: 50%
 
+   Figure: FlowBench - An Anomaly Detection Benchmark Dataset
+   
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
@@ -25,6 +31,7 @@ In this GitHub repository, apart from the logs and traces, you will find sample
    flowbench.nlp
 
    license
+   
 Indices and tables
 ==================
 
diff --git a/flowbench/supervised/gnn.py b/flowbench/supervised/gnn.py
index f0d7db4..845c753 100644
--- a/flowbench/supervised/gnn.py
+++ b/flowbench/supervised/gnn.py
@@ -86,92 +86,92 @@ def configure_optimizers(self):
         return torch.optim.Adam(self.parameters(), lr=self.lr)
 
 
-class GNN_v2(L.LightningModule):
-    r""" GNN model for node classification
-    NOTE: the version used in WORKS'22 paper
-    """
-
-    def __init__(self, num_features, num_classes, **kwargs):
-        super(GNN_v2, self).__init__()
-        self.hidden_dim = kwargs.get('hidden_dim', 128)
-        self.num_layers = kwargs.get('num_layers', 3)
-        self.lr = kwargs.get('lr', 1e-4)
-        self.dropout = kwargs.get('dropout', 0.5)
-
-        # add the ability to add one or more conv layers
-        conv_blocks = [
-            GCNConv(num_features, self.hidden_dim),
-            ReLU(),
-        ]
-
-        # ability to  add one or more conv blocks
-        for _ in range(self.num_layers - 1):
-            conv_blocks += [
-                GCNConv(self.hidden_dim, self.hidden_dim),
-                ReLU(),
-                GCNConv(self.hidden_dim, self.hidden_dim),
-                ReLU(),
-            ]
-
-        # group all the conv layers
-        self.conv_layers = ModuleList(conv_blocks)
-
-        # add the linear layers for flattening the output from MPNN
-        self.flatten = Sequential(
-            Linear(self.hidden_dim, self.hidden_dim),
-            ReLU(),
-            Linear(self.hidden_dim, num_classes))
-
-        self.acc = torchmetrics.Accuracy(task='binary')
-        self.auroc = torchmetrics.AUROC(task='binary')
-
-    def forward(self, data):
-        # process the layers
-        x, edge_index = data.x, data.edge_index
-        for idx, layer in enumerate(self.conv_layers):
-            if isinstance(layer, GCNConv):
-                x = layer(x, edge_index)
-            else:
-                x = layer(x)
-        x = F.dropout(x, p=self.dropout, training=self.training)
-        # pass the output to the linear output layer
-        out = self.flatten(x)
-
-        # return the output
-        return F.log_softmax(out, dim=1)
-
-    def training_step(self, batch, batch_idx):
-        x = self.forward(batch)
-        loss = torch.nn.functional.cross_entropy(x, batch.y)
-        # self.log('train_loss', loss)
-        # acc = self.acc(x.argmax(dim=1), batch.y)
-        # self.log('train_acc', acc, on_epoch=False, prog_bar=False, on_step=False)
-        # auc = self.auroc(x.argmax(dim=1), batch.y)
-        # self.log('train_auc', auc, on_epoch=False)
-        return loss
-
-    def validation_step(self, batch, batch_idx):
-        x = self.forward(batch)
-        loss = torch.nn.functional.cross_entropy(x, batch.y)
-        self.log('val_loss', loss)
-        acc = self.acc(x.argmax(dim=1), batch.y)
-        self.log('val_acc', acc, on_epoch=True, prog_bar=True, on_step=False)
-        auc = self.auroc(x.argmax(dim=1), batch.y)
-        self.log('val_auc', auc, on_epoch=True)
-        return loss
-
-    def test_step(self, batch, batch_idx):
-        x = self.forward(batch)
-        loss = torch.nn.functional.cross_entropy(x, batch.y)
-        self.log('test_loss', loss)
-        acc = self.acc(x.argmax(dim=1), batch.y)
-        self.log('test_acc', acc, on_epoch=True)
-        auc = self.auroc(x.argmax(dim=1), batch.y)
-        self.log('test_auc', auc, on_epoch=True)
-        return loss
-
-    def configure_optimizers(self):
-        return torch.optim.Adam(self.parameters(), lr=self.lr)
+# class GNN_v2(L.LightningModule):
+#     r""" GNN model for node classification
+#     NOTE: the version used in WORKS'22 paper
+#     """
+
+#     def __init__(self, num_features, num_classes, **kwargs):
+#         super(GNN_v2, self).__init__()
+#         self.hidden_dim = kwargs.get('hidden_dim', 128)
+#         self.num_layers = kwargs.get('num_layers', 3)
+#         self.lr = kwargs.get('lr', 1e-4)
+#         self.dropout = kwargs.get('dropout', 0.5)
+
+#         # add the ability to add one or more conv layers
+#         conv_blocks = [
+#             GCNConv(num_features, self.hidden_dim),
+#             ReLU(),
+#         ]
+
+#         # ability to  add one or more conv blocks
+#         for _ in range(self.num_layers - 1):
+#             conv_blocks += [
+#                 GCNConv(self.hidden_dim, self.hidden_dim),
+#                 ReLU(),
+#                 GCNConv(self.hidden_dim, self.hidden_dim),
+#                 ReLU(),
+#             ]
+
+#         # group all the conv layers
+#         self.conv_layers = ModuleList(conv_blocks)
+
+#         # add the linear layers for flattening the output from MPNN
+#         self.flatten = Sequential(
+#             Linear(self.hidden_dim, self.hidden_dim),
+#             ReLU(),
+#             Linear(self.hidden_dim, num_classes))
+
+#         self.acc = torchmetrics.Accuracy(task='binary')
+#         self.auroc = torchmetrics.AUROC(task='binary')
+
+#     def forward(self, data):
+#         # process the layers
+#         x, edge_index = data.x, data.edge_index
+#         for idx, layer in enumerate(self.conv_layers):
+#             if isinstance(layer, GCNConv):
+#                 x = layer(x, edge_index)
+#             else:
+#                 x = layer(x)
+#         x = F.dropout(x, p=self.dropout, training=self.training)
+#         # pass the output to the linear output layer
+#         out = self.flatten(x)
+
+#         # return the output
+#         return F.log_softmax(out, dim=1)
+
+#     def training_step(self, batch, batch_idx):
+#         x = self.forward(batch)
+#         loss = torch.nn.functional.cross_entropy(x, batch.y)
+#         # self.log('train_loss', loss)
+#         # acc = self.acc(x.argmax(dim=1), batch.y)
+#         # self.log('train_acc', acc, on_epoch=False, prog_bar=False, on_step=False)
+#         # auc = self.auroc(x.argmax(dim=1), batch.y)
+#         # self.log('train_auc', auc, on_epoch=False)
+#         return loss
+
+#     def validation_step(self, batch, batch_idx):
+#         x = self.forward(batch)
+#         loss = torch.nn.functional.cross_entropy(x, batch.y)
+#         self.log('val_loss', loss)
+#         acc = self.acc(x.argmax(dim=1), batch.y)
+#         self.log('val_acc', acc, on_epoch=True, prog_bar=True, on_step=False)
+#         auc = self.auroc(x.argmax(dim=1), batch.y)
+#         self.log('val_auc', auc, on_epoch=True)
+#         return loss
+
+#     def test_step(self, batch, batch_idx):
+#         x = self.forward(batch)
+#         loss = torch.nn.functional.cross_entropy(x, batch.y)
+#         self.log('test_loss', loss)
+#         acc = self.acc(x.argmax(dim=1), batch.y)
+#         self.log('test_acc', acc, on_epoch=True)
+#         auc = self.auroc(x.argmax(dim=1), batch.y)
+#         self.log('test_auc', auc, on_epoch=True)
+#         return loss
+
+#     def configure_optimizers(self):
+#         return torch.optim.Adam(self.parameters(), lr=self.lr)
 
 
 class PyG_GNN(L.LightningModule):
diff --git a/flowbench/supervised/utils.py b/flowbench/supervised/utils.py
index 4018e24..a3673a7 100644
--- a/flowbench/supervised/utils.py
+++ b/flowbench/supervised/utils.py
@@ -8,6 +8,7 @@
 
 def early_stopping_callback(minitor='val_loss', patience=5, mode='min'):
     r""" Early stopping callback.
+
     Args:
         minitor (str): The metric to monitor.
         patience (int): Number of epochs with no improvement after which training will be stopped.
diff --git a/flowbench/unsupervised/pygod.py b/flowbench/unsupervised/pygod.py
index 1ed74a3..49840c1 100644
--- a/flowbench/unsupervised/pygod.py
+++ b/flowbench/unsupervised/pygod.py
@@ -7,14 +7,14 @@
 of `pyod.detector`.
 
 Citation:
-@article{liu2022bond,
-  title={Bond: Benchmarking unsupervised outlier node detection on static attributed graphs},
-  author={Liu, Kay and Dou, Yingtong and Zhao, Yue and Ding, Xueying and Hu, Xiyang and Zhang, Ruitong and Ding, Kaize and Chen, Canyu and Peng, Hao and Shu, Kai and Sun, Lichao and Li, Jundong and Chen, George H. and Jia, Zhihao and Yu, Philip S.},
-  journal={Advances in Neural Information Processing Systems},
-  volume={35},
-  pages={27021--27035},
-  year={2022}
-}
+    @article{liu2022bond,
+        title={Bond: Benchmarking unsupervised outlier node detection on static attributed graphs},
+        author={Liu, Kay and Dou, Yingtong and Zhao, Yue and Ding, Xueying and Hu, Xiyang and Zhang, Ruitong and Ding, Kaize and Chen, Canyu and Peng, Hao and Shu, Kai and Sun, Lichao and Li, Jundong and Chen, George H. and Jia, Zhihao and Yu, Philip S.},
+        journal={Advances in Neural Information Processing Systems},
+        volume={35},
+        pages={27021--27035},
+        year={2022}
+    }
 
 For more information, please refer to https://docs.pygod.org/en/latest/.
 
diff --git a/flowbench/unsupervised/pyod.py b/flowbench/unsupervised/pyod.py
index e0f8218..a09dbbe 100644
--- a/flowbench/unsupervised/pyod.py
+++ b/flowbench/unsupervised/pyod.py
@@ -7,17 +7,17 @@
 of `pyod.models`.
 
 Citation:
-@article{PyOD2019,
-  author = {Zhao, Yue},
-  title = {PyOD: A Python Toolbox for Scalable Outlier Detection},
-  year = {2019},
-  publisher = {Journal of Machine Learning Research},
-  journal = {JMLR},
-  volume = {20},
-  number = {96},
-  pages = {1-7},
-  url = {http://jmlr.org/papers/v20/19-011.html},
-}
+    @article{PyOD2019,
+        author = {Zhao, Yue},
+        title = {PyOD: A Python Toolbox for Scalable Outlier Detection},
+        year = {2019},
+        publisher = {Journal of Machine Learning Research},
+        journal = {JMLR},
+        volume = {20},
+        number = {96},
+        pages = {1-7},
+        url = {http://jmlr.org/papers/v20/19-011.html},
+    }
 
 For more information, please refer to https://pyod.readthedocs.io/.