From a91fa305029a9be8150f15a645adc2f1db0e6d1d Mon Sep 17 00:00:00 2001
From: Kevin Sun <skk7757@163.com>
Date: Fri, 20 Jun 2025 15:47:55 +0800
Subject: [PATCH 1/2] Several fixes:

1. Add additional fix in TransformGetSetItemToIndex for more than one indices and related test case
2. Fix compile issues in Adam optimizer in ES
3. Set default devices at the start of all tests if necessary
4. Fix a obsolete description in HPOProblemWrapper
---
 .../algorithms/so/es_variants/adam_step.py    | 21 +++++-----
 src/evox/algorithms/so/pso_variants/utils.py  |  2 +-
 src/evox/core/module.py                       | 34 ++++++++++++++---
 src/evox/problems/hpo_wrapper.py              |  2 +-
 unit_test/algorithms/test_moea.py             |  7 +++-
 unit_test/core/test_index_fix.py              | 38 +++++++++++++++++++
 unit_test/operators/test_rvea_selection.py    |  1 +
 unit_test/operators/test_sbx_and_pm.py        |  1 +
 unit_test/problems/test_basic.py              |  1 +
 unit_test/problems/test_cec2022.py            |  1 +
 unit_test/problems/test_dtlz.py               |  1 +
 11 files changed, 88 insertions(+), 21 deletions(-)
 create mode 100644 unit_test/core/test_index_fix.py

diff --git a/src/evox/algorithms/so/es_variants/adam_step.py b/src/evox/algorithms/so/es_variants/adam_step.py
index cd6bb381d..2a3d1bc5b 100644
--- a/src/evox/algorithms/so/es_variants/adam_step.py
+++ b/src/evox/algorithms/so/es_variants/adam_step.py
@@ -6,21 +6,20 @@ def adam_single_tensor(
     grad: torch.Tensor,
     exp_avg: torch.Tensor,
     exp_avg_sq: torch.Tensor,
-    beta1: torch.Tensor = torch.tensor(0.9),
-    beta2: torch.Tensor = torch.tensor(0.999),
-    lr: torch.Tensor = torch.tensor(1e-3),
-    weight_decay: torch.Tensor = torch.tensor(0),
-    eps: torch.Tensor = torch.tensor(1e-8),
+    beta1: float | torch.Tensor = 0.9,
+    beta2: float | torch.Tensor = 0.999,
+    lr: float | torch.Tensor = 1e-3,
+    weight_decay: float | torch.Tensor = 0,
+    eps: float | torch.Tensor = 1e-8,
+    decouple_weight_decay: bool = False,
 ):
     # weight decay
-    if weight_decay != 0:
-        weight_decay = weight_decay.to(param.device)
+    # if weight_decay != 0:
+    if decouple_weight_decay:
+        param = param * (1 - weight_decay * lr)
+    else:
         grad = grad + weight_decay * param
     # Decay the first and second moment running average coefficient
-    beta1 = beta1.to(param.device)
-    beta2 = beta2.to(param.device)
-    lr = lr.to(param.device)
-    eps = eps.to(param.device)
     exp_avg = torch.lerp(exp_avg, grad, 1 - beta1)
     exp_avg_sq = exp_avg_sq * beta2 + grad * grad.conj() * (1 - beta2)
     denom = exp_avg_sq.sqrt() + eps
diff --git a/src/evox/algorithms/so/pso_variants/utils.py b/src/evox/algorithms/so/pso_variants/utils.py
index e74059c66..1d69a8d93 100644
--- a/src/evox/algorithms/so/pso_variants/utils.py
+++ b/src/evox/algorithms/so/pso_variants/utils.py
@@ -18,7 +18,7 @@ def min_by(
     values = torch.cat(values, dim=0)
     keys = torch.cat(keys, dim=0)
     min_index = torch.argmin(keys)
-    return values[min_index[None]][0], keys[min_index[None]][0]
+    return values[min_index], keys[min_index]
 
 
 def random_select_from_mask(mask: torch.Tensor, count: int, dim: int = -1) -> torch.Tensor:
diff --git a/src/evox/core/module.py b/src/evox/core/module.py
index 362a89731..8443f0cfa 100644
--- a/src/evox/core/module.py
+++ b/src/evox/core/module.py
@@ -10,7 +10,7 @@
 
 
 from functools import wraps
-from typing import Callable, Dict, Optional, TypeVar, Union
+from typing import Any, Callable, Dict, Optional, Sequence, TypeVar, Union
 
 import torch
 import torch.nn as nn
@@ -84,6 +84,24 @@ def eval(self):
         assert False, "`ModuleBase.eval()` shall never be invoked to prevent ambiguity."
 
 
+def _transform_scalar_index(ori_index: Sequence[Any | torch.Tensor] | Any | torch.Tensor):
+    if isinstance(ori_index, Sequence):
+        index = tuple(ori_index)
+    else:
+        index = (ori_index,)
+    any_scalar_tensor = False
+    new_index = []
+    for idx in index:
+        if isinstance(idx, torch.Tensor) and idx.ndim == 0:
+            new_index.append(idx[None])
+            any_scalar_tensor = True
+        else:
+            new_index.append(idx)
+    if not isinstance(ori_index, Sequence):
+        new_index = new_index[0]
+    return new_index, any_scalar_tensor
+
+
 # We still need a fix for the vmap
 # related issue: https://github.com/pytorch/pytorch/issues/124423
 class TransformGetSetItemToIndex(TorchFunctionMode):
@@ -95,15 +113,19 @@ class TransformGetSetItemToIndex(TorchFunctionMode):
     # That is, we convert A[idx] to A[idx[None]][0], A[idx] += 1 to A[idx[None]] += 1.
     # This is a temporary solution until the issue is fixed in PyTorch.
     def __torch_function__(self, func, types, args, kwargs=None):
+        # A[idx]
         if func == torch.Tensor.__getitem__:
             x, index = args
-            if isinstance(index, torch.Tensor) and index.ndim == 0:
-                return func(x, index[None], **(kwargs or {}))[0]
-                # return torch.index_select(x, 0, index)
+            new_index, any_scalar = _transform_scalar_index(index)
+            x = func(x, new_index, **(kwargs or {}))
+            if any_scalar:
+                x = x.squeeze(0)
+            return x
+        # A[idx] = value
         elif func == torch.Tensor.__setitem__:
             x, index, value = args
-            if isinstance(index, torch.Tensor) and index.ndim == 0:
-                return func(x, index[None], value, **(kwargs or {}))
+            new_index, _ = _transform_scalar_index(index)
+            return func(x, new_index, value, **(kwargs or {}))
 
         return func(*args, **(kwargs or {}))
 
diff --git a/src/evox/problems/hpo_wrapper.py b/src/evox/problems/hpo_wrapper.py
index 861261d2c..c32798398 100644
--- a/src/evox/problems/hpo_wrapper.py
+++ b/src/evox/problems/hpo_wrapper.py
@@ -187,7 +187,7 @@ def __init__(
 
         :param iterations: The number of iterations to be executed in the optimization process.
         :param num_instances: The number of instances to be executed in parallel in the optimization process, i.e., the population size of the outer algorithm.
-        :param workflow: The workflow to be used in the optimization process. Must be wrapped by `core.jit_class`.
+        :param workflow: The workflow to be used in the optimization process.
         :param num_repeats: The number of times to repeat the evaluation process for each instance. Defaults to 1.
         :param copy_init_state: Whether to copy the initial state of the workflow for each evaluation. Defaults to `True`. If your workflow contains operations that IN-PLACE modify the tensor(s) in initial state, this should be set to `True`. Otherwise, you can set it to `False` to save memory.
         """
diff --git a/unit_test/algorithms/test_moea.py b/unit_test/algorithms/test_moea.py
index a30910234..fcb4296e5 100644
--- a/unit_test/algorithms/test_moea.py
+++ b/unit_test/algorithms/test_moea.py
@@ -1,4 +1,4 @@
-from unittest import TestCase, skip
+from unittest import TestCase, skipIf
 
 import torch
 
@@ -74,7 +74,10 @@ def test_moead(self):
         self.run_algorithm(algo)
         self.run_compiled_algorithm(algo)
 
-    @skip("Torch 2.7 bug when running on non-AVX512 CPU: https://github.com/pytorch/pytorch/issues/152172")
+    @skipIf(
+        torch.__version__.startswith("2.7."),
+        "Torch 2.7 bug when running on non-AVX512 CPU: https://github.com/pytorch/pytorch/issues/152172",
+    )
     def test_hype(self):
         algo = HypE(pop_size=self.pop_size, n_objs=3, lb=self.lb, ub=self.ub)
         self.run_algorithm(algo)
diff --git a/unit_test/core/test_index_fix.py b/unit_test/core/test_index_fix.py
new file mode 100644
index 000000000..b928a91a7
--- /dev/null
+++ b/unit_test/core/test_index_fix.py
@@ -0,0 +1,38 @@
+import unittest
+
+import torch
+
+from evox.core import compile, vmap
+
+
+class TestIndexFix(unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+        torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def test_get(self):
+        def _get_vmap(x: torch.Tensor, index: torch.Tensor, range: torch.Tensor):
+            return x[range, index, index]
+
+        mapped = compile(vmap(_get_vmap, in_dims=(0, 0, None)))
+        x = torch.rand(3, 2, 5, 5)
+        indices = torch.randint(5, (3,))
+        print(x)
+        print(indices)
+        x = mapped(x, indices, torch.arange(2))
+        print(x)
+
+    def test_set(self):
+        def _set_vmap(x: torch.Tensor, index: torch.Tensor, range: torch.Tensor, value: torch.Tensor):
+            x[range, index, index] = value
+            return x
+
+        mapped = compile(vmap(_set_vmap, in_dims=(0, 0, None, 0)), fullgraph=True)
+        x = torch.rand(3, 2, 5, 5)
+        indices = torch.randint(5, (3,))
+        values = torch.rand(3, 2)
+        print(x)
+        print(indices)
+        print(values)
+        x = mapped(x, indices, torch.arange(2), values)
+        print(x)
diff --git a/unit_test/operators/test_rvea_selection.py b/unit_test/operators/test_rvea_selection.py
index e50e05048..f537eb7fa 100644
--- a/unit_test/operators/test_rvea_selection.py
+++ b/unit_test/operators/test_rvea_selection.py
@@ -7,6 +7,7 @@
 
 class TestRefVecGuided(unittest.TestCase):
     def setUp(self):
+        torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
         self.n, self.m, self.nv = 12, 4, 5
         self.x = torch.randn(self.n, 10)
         self.f = torch.randn(self.n, self.m)
diff --git a/unit_test/operators/test_sbx_and_pm.py b/unit_test/operators/test_sbx_and_pm.py
index 1e40c395f..0129f032f 100644
--- a/unit_test/operators/test_sbx_and_pm.py
+++ b/unit_test/operators/test_sbx_and_pm.py
@@ -8,6 +8,7 @@
 
 class TestOperators(TestCase):
     def setUp(self):
+        torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
         self.n_individuals = 9
         self.n_genes = 10
         self.x = torch.randn(self.n_individuals, self.n_genes)
diff --git a/unit_test/problems/test_basic.py b/unit_test/problems/test_basic.py
index 00c80f4d8..dd72edcf3 100644
--- a/unit_test/problems/test_basic.py
+++ b/unit_test/problems/test_basic.py
@@ -7,6 +7,7 @@
 
 class TestBasic(unittest.TestCase):
     def setUp(self, dimensions: list = [10], pop_size: int = 7):
+        torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
         self.dimensions = dimensions
         self.pop_size = pop_size
         self.problems = [
diff --git a/unit_test/problems/test_cec2022.py b/unit_test/problems/test_cec2022.py
index aca9335c8..4949254d1 100644
--- a/unit_test/problems/test_cec2022.py
+++ b/unit_test/problems/test_cec2022.py
@@ -12,6 +12,7 @@ def setUp(self):
         self.dimensionality = [2, 10, 20]
         self.pop_size = 100
         torch.manual_seed(42)
+        torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
 
     def test_evaluate(self):
         for i in range(1, 13):
diff --git a/unit_test/problems/test_dtlz.py b/unit_test/problems/test_dtlz.py
index c5c5f3251..0c0738c1b 100644
--- a/unit_test/problems/test_dtlz.py
+++ b/unit_test/problems/test_dtlz.py
@@ -7,6 +7,7 @@
 
 class TestDTLZ(TestCase):
     def setUp(self):
+        torch.set_default_device("cuda" if torch.cuda.is_available() else "cpu")
         d = 12
         m = 3
         self.pro = [

From 66bfd459ba10274b47b0ce873cd9808367399d6f Mon Sep 17 00:00:00 2001
From: Kevin Sun <skk7757@163.com>
Date: Fri, 20 Jun 2025 15:51:12 +0800
Subject: [PATCH 2/2] Ruff

---
 src/evox/core/module.py                        |  4 +++-
 unit_test/problems/test_basic.py               |  4 +++-
 unit_test/problems/test_hpo_wrapper.py         | 16 ++++++++++++----
 unit_test/problems/test_supervised_learning.py |  4 +++-
 unit_test/workflows/test_std_workflow.py       |  4 +++-
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/evox/core/module.py b/src/evox/core/module.py
index 8443f0cfa..b543a9504 100644
--- a/src/evox/core/module.py
+++ b/src/evox/core/module.py
@@ -45,7 +45,9 @@ def Parameter(
     )
 
 
-def Mutable(value: torch.Tensor, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None) -> torch.Tensor:
+def Mutable(
+    value: torch.Tensor, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None
+) -> torch.Tensor:
     """Wraps a value as a mutable tensor.
     This is often used to label a value in an algorithm as a mutable tensor that may changes during iteration(s).
 
diff --git a/unit_test/problems/test_basic.py b/unit_test/problems/test_basic.py
index dd72edcf3..85e7eaeac 100644
--- a/unit_test/problems/test_basic.py
+++ b/unit_test/problems/test_basic.py
@@ -25,4 +25,6 @@ def test_evaluate(self):
                 problem = problem(shift=torch.rand(dimension), affine=torch.rand(dimension, dimension))
                 population = torch.randn(self.pop_size, dimension)
                 fitness = problem.evaluate(population)
-                print(f"The fitness of {problem.__class__.__name__} function with {dimension} dimension is {fitness}")
+                print(
+                    f"The fitness of {problem.__class__.__name__} function with {dimension} dimension is {fitness}"
+                )
diff --git a/unit_test/problems/test_hpo_wrapper.py b/unit_test/problems/test_hpo_wrapper.py
index debd09b75..6f309e71c 100644
--- a/unit_test/problems/test_hpo_wrapper.py
+++ b/unit_test/problems/test_hpo_wrapper.py
@@ -20,7 +20,9 @@ def evaluate(self, x: torch.Tensor):
 class BasicAlgorithm(Algorithm):
     def __init__(self, pop_size: int, lb: torch.Tensor, ub: torch.Tensor, device: torch.device | None = None):
         super().__init__()
-        assert lb.ndim == 1 and ub.ndim == 1, f"Lower and upper bounds shall have ndim of 1, got {lb.ndim} and {ub.ndim}"
+        assert lb.ndim == 1 and ub.ndim == 1, (
+            f"Lower and upper bounds shall have ndim of 1, got {lb.ndim} and {ub.ndim}"
+        )
         assert lb.shape == ub.shape, f"Lower and upper bounds shall have same shape, got {lb.ndim} and {ub.ndim}"
         device = torch.get_default_device() if device is None else device
         self.pop_size = pop_size
@@ -46,19 +48,25 @@ def setUp(self):
         self.prob = BasicProblem()
         self.monitor = HPOFitnessMonitor()
         self.workflow = StdWorkflow(self.algo, self.prob, monitor=self.monitor)
-        self.hpo_prob = HPOProblemWrapper(iterations=9, num_instances=7, workflow=self.workflow, copy_init_state=True)
+        self.hpo_prob = HPOProblemWrapper(
+            iterations=9, num_instances=7, workflow=self.workflow, copy_init_state=True
+        )
 
         self.algo_mo = BasicAlgorithm(10, -10 * torch.ones(2), 10 * torch.ones(2))
         self.prob_mo = DTLZ1(2, 2)
         self.monitor_mo = HPOFitnessMonitor(multi_obj_metric=lambda f: igd(f, self.prob_mo.pf()))
         self.workflow_mo = StdWorkflow(self.algo_mo, self.prob_mo, monitor=self.monitor_mo)
-        self.hpo_prob_mo = HPOProblemWrapper(iterations=9, num_instances=7, workflow=self.workflow_mo, copy_init_state=True)
+        self.hpo_prob_mo = HPOProblemWrapper(
+            iterations=9, num_instances=7, workflow=self.workflow_mo, copy_init_state=True
+        )
 
         self.algo_mo2 = BasicAlgorithm(10, -10 * torch.ones(2), 10 * torch.ones(2))
         self.prob_mo2 = DTLZ1(2, 2)
         self.monitor_mo2 = HPOFitnessMonitor(multi_obj_metric=lambda f: igd(f, self.prob_mo2.pf()))
         self.workflow_mo2 = StdWorkflow(self.algo_mo2, self.prob_mo2, monitor=self.monitor_mo2)
-        self.hpo_prob_mo2 = HPOProblemWrapper(iterations=9, num_instances=7, workflow=self.workflow_mo2, copy_init_state=True)
+        self.hpo_prob_mo2 = HPOProblemWrapper(
+            iterations=9, num_instances=7, workflow=self.workflow_mo2, copy_init_state=True
+        )
 
     def test_get_init_params(self):
         params = self.hpo_prob.get_init_params()
diff --git a/unit_test/problems/test_supervised_learning.py b/unit_test/problems/test_supervised_learning.py
index b7174848f..094d4494d 100644
--- a/unit_test/problems/test_supervised_learning.py
+++ b/unit_test/problems/test_supervised_learning.py
@@ -89,7 +89,9 @@ def setUp(self):
                 for inputs, labels in self.train_loader
             ]
         )
-        self.pre_test_loader = tuple([(inputs.to(self.device), labels.to(self.device)) for inputs, labels in self.test_loader])
+        self.pre_test_loader = tuple(
+            [(inputs.to(self.device), labels.to(self.device)) for inputs, labels in self.test_loader]
+        )
 
         self.model = SampleCNN().to(self.device)
         self.adapter = ParamsAndVector(dummy_model=self.model)
diff --git a/unit_test/workflows/test_std_workflow.py b/unit_test/workflows/test_std_workflow.py
index 3831ba4ce..d5c4a35b0 100644
--- a/unit_test/workflows/test_std_workflow.py
+++ b/unit_test/workflows/test_std_workflow.py
@@ -24,7 +24,9 @@ def evaluate(self, pop: torch.Tensor):
 class BasicAlgorithm(Algorithm):
     def __init__(self, pop_size: int, lb: torch.Tensor, ub: torch.Tensor):
         super().__init__()
-        assert lb.ndim == 1 and ub.ndim == 1, f"Lower and upper bounds shall have ndim of 1, got {lb.ndim} and {ub.ndim}"
+        assert lb.ndim == 1 and ub.ndim == 1, (
+            f"Lower and upper bounds shall have ndim of 1, got {lb.ndim} and {ub.ndim}"
+        )
         assert lb.shape == ub.shape, f"Lower and upper bounds shall have same shape, got {lb.ndim} and {ub.ndim}"
         self.pop_size = pop_size
         self.lb = lb