utiasDSL · Federico-PizarroBejarano · May 7, 2025
diff --git a/safe_control_gym/controllers/lqr/ilqr.py b/safe_control_gym/controllers/lqr/ilqr.py
@@ -317,7 +317,7 @@ def calculate_lqr_action(self, obs, step):
 
         Args:
             obs (ndarray): The observation at this timestep.
-            step (int): The timestep.
+            step (int): The current step/iteration of the environment.
 
         Returns:
             action (ndarray): The calculated action.

diff --git a/safe_control_gym/controllers/mpc/gp_mpc.py b/safe_control_gym/controllers/mpc/gp_mpc.py
@@ -582,12 +582,14 @@ def setup_gp_optimizer(self, n_ind_points):
             self.z_ind_val = z_ind_val
 
     def select_action_with_gp(self,
-                              obs
+                              obs,
+                              step,
                               ):
         '''Solves nonlinear MPC problem to get next action.
 
         Args:
             obs (np.array): Current state/observation.
+            step (int): The current step/iteration of the environment.
 
         Returns:
             action (np.array): Input/action to the task/env.
@@ -603,13 +605,14 @@ def select_action_with_gp(self,
         mean_post_factor = opti_dict['mean_post_factor']
         z_ind = opti_dict['z_ind']
         n_ind_points = opti_dict['n_ind_points']
+
         # Assign the initial state.
         opti.set_value(x_init, obs)
+
         # Assign reference trajectory within horizon.
-        goal_states = self.get_references()
+        goal_states = self.get_references(step)
         opti.set_value(x_ref, goal_states)
-        if self.mode == 'tracking':
-            self.traj_step += 1
+
         # Set the probabilistic state and input constraint set limits.
         state_constraint_set_prev, input_constraint_set_prev = self.precompute_probabilistic_limits()
 
@@ -627,6 +630,7 @@ def select_action_with_gp(self,
 
         opti.set_value(mean_post_factor, mean_post_factor_val)
         opti.set_value(z_ind, z_ind_val)
+
         # Initial guess for the optimization problem.
         if self.warmstart and self.x_prev is not None and self.u_prev is not None:
             # Shift previous solutions by 1 step
@@ -636,6 +640,7 @@ def select_action_with_gp(self,
             u_guess[:-1] = u_guess[1:]
             opti.set_initial(x_var, x_guess)
             opti.set_initial(u_var, u_guess)
+
         # Solve the optimization problem.
         try:
             sol = opti.solve()
@@ -661,6 +666,7 @@ def select_action_with_gp(self,
         self.results_dict['linear_pred'].append(lin_pred)
         self.results_dict['gp_mean_eq_pred'].append(gp_contribution)
         self.results_dict['gp_pred'].append(pred.numpy())
+
         # Take the first one from solved action sequence.
         if u_val.ndim > 1:
             action = u_val[:, 0]
@@ -976,7 +982,8 @@ def select_action(self,
                 print('[ERROR]: Not yet supported.')
                 exit()
             t1 = time.perf_counter()
-            action = self.select_action_with_gp(obs)
+            step = self.extract_step(info)
+            action = self.select_action_with_gp(obs, step)
             t2 = time.perf_counter()
             print(f'GP SELECT ACTION TIME: {(t2 - t1)}')
             self.last_obs = obs
@@ -1010,7 +1017,7 @@ def reset(self):
         elif self.env.TASK == Task.TRAJ_TRACKING:
             self.mode = 'tracking'
             self.traj = self.env.X_GOAL.T
-            self.traj_step = 0
+
         # Dynamics model.
         if self.gaussian_process is not None:
             if self.sparse_gp and self.train_data['train_targets'].shape[0] <= self.n_ind_points:

diff --git a/safe_control_gym/controllers/mpc/linear_mpc.py b/safe_control_gym/controllers/mpc/linear_mpc.py
@@ -16,7 +16,6 @@
 from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system
 from safe_control_gym.controllers.mpc.mpc import MPC
 from safe_control_gym.controllers.mpc.mpc_utils import compute_discrete_lqr_gain_from_cont_linear_system
-from safe_control_gym.envs.benchmark_env import Task
 
 
 class LinearMPC(MPC):
@@ -214,14 +213,16 @@ def select_action(self,
 
         # Assign the initial state.
         opti.set_value(x_init, obs - self.X_EQ)
+
         # Assign reference trajectory within horizon.
-        goal_states = self.get_references()
+        step = self.extract_step(info)
+        goal_states = self.get_references(step)
         opti.set_value(x_ref, goal_states)
-        if self.env.TASK == Task.TRAJ_TRACKING:
-            self.traj_step += 1
+
         if self.warmstart and self.u_prev is not None and self.x_prev is not None:
             opti.set_initial(x_var, self.x_prev)
             opti.set_initial(u_var, self.u_prev)
+
         # Solve the optimization problem.
         try:
             sol = opti.solve()

diff --git a/safe_control_gym/controllers/mpc/mpc.py b/safe_control_gym/controllers/mpc/mpc.py
@@ -130,8 +130,6 @@ def reset_before_run(self, obs=None, info=None, env=None):
         # Initialize previous state and action.
         self.x_prev = None
         self.u_prev = None
-        # Step along the reference.
-        self.traj_step = 0
         super().reset_before_run(obs, info, env)
 
     def reset(self):
@@ -274,15 +272,17 @@ def select_action(self,
         '''
         opti_dict = self.opti_dict
         opti = opti_dict['opti']
-        x_var = opti_dict['x_var']  # optimization variables
-        u_var = opti_dict['u_var']  # optimization variables
-        x_init = opti_dict['x_init']  # initial state
-        x_ref = opti_dict['x_ref']  # reference state/trajectory
+        x_var = opti_dict['x_var']  # Optimization variables
+        u_var = opti_dict['u_var']  # Optimization variables
+        x_init = opti_dict['x_init']  # Initial state
+        x_ref = opti_dict['x_ref']  # Reference state/trajectory
 
         # Assign the initial state.
         opti.set_value(x_init, obs)
+
         # Assign reference trajectory within horizon.
-        goal_states = self.get_references()
+        step = self.extract_step(info)
+        goal_states = self.get_references(step)
         opti.set_value(x_ref, goal_states)
 
         if self.warmstart and self.x_prev is not None and self.u_prev is not None:
@@ -294,10 +294,6 @@ def select_action(self,
             opti.set_initial(x_var, x_guess)
             opti.set_initial(u_var, u_guess)
 
-        if self.mode == 'tracking':
-            # Increment the trajectory step after update the reference and initial guess
-            self.traj_step += 1
-
         # Solve the optimization problem.
         try:
             sol = opti.solve()
@@ -338,15 +334,22 @@ def select_action(self,
         self.prev_action = action
         return action
 
-    def get_references(self):
-        '''Constructs reference states along mpc horizon, (nx, T+1).'''
+    def get_references(self, step):
+        '''Constructs reference states along mpc horizon, (nx, T+1).
+
+        Args:
+            step (int): The current step/iteration of the environment.
+
+        Returns:
+            goal_states (ndarray): Reference states along MPC horizon, shape (nx, T+1).
+        '''
         if self.env.TASK == Task.STABILIZATION:
             # Repeat goal state for horizon steps.
             goal_states = np.tile(self.env.X_GOAL.reshape(-1, 1), (1, self.T + 1))
         elif self.env.TASK == Task.TRAJ_TRACKING:
             # Slice trajectory for horizon steps, if not long enough, repeat last state.
-            start = min(self.traj_step, self.traj.shape[-1])
-            end = min(self.traj_step + self.T + 1, self.traj.shape[-1])
+            start = min(step, self.traj.shape[-1])
+            end = min(step + self.T + 1, self.traj.shape[-1])
             remain = max(0, self.T + 1 - (end - start))
             goal_states = np.concatenate([
                 self.traj[:, start:end],

diff --git a/safe_control_gym/controllers/mpc/mpc_acados.py b/safe_control_gym/controllers/mpc/mpc_acados.py
@@ -244,9 +244,8 @@ def select_action(self,
         self.acados_ocp_solver.set(0, 'ubx', obs)
 
         # Set reference for the control horizon
-        goal_states = self.get_references()
-        if self.mode == 'tracking':
-            self.traj_step += 1
+        step = self.extract_step(info)
+        goal_states = self.get_references(step)
 
         y_ref = np.concatenate((goal_states[:, :-1],
                                 np.repeat(self.U_EQ.reshape(-1, 1), self.T, axis=1)), axis=0)

diff --git a/safe_control_gym/controllers/mpc/mpc_utils.py b/safe_control_gym/controllers/mpc/mpc_utils.py
@@ -1,11 +1,9 @@
 '''General MPC utility functions.'''
 
 import casadi as cs
-import matplotlib.pyplot as plt
 import numpy as np
 import scipy
 import scipy.linalg
-from termcolor import colored
 
 from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system
 from safe_control_gym.envs.constraints import ConstraintList
@@ -151,38 +149,3 @@ def set_acados_constraint_bound(constraint,
             bound_value = 1e-6
 
     return bound_value * np.ones(constraint.shape)
-
-
-def plot_open_loop_sol(ctrl):
-    '''Plot the open loop prediction of the MPC controller.
-
-    Args:
-        ctrl (MPC): MPC controller object.
-    '''
-    if ctrl.x_prev is not None and ctrl.u_prev is not None:
-        nx = ctrl.x_prev.shape[0]  # state dim
-        nu = ctrl.u_prev.shape[0]  # input dim
-        steps = ctrl.T  # prediction horizon
-        dt = ctrl.dt  # ctrl frequency
-        x = ctrl.x_prev  # open loop state (nx, steps + 1)
-        u = ctrl.u_prev  # open loop input (nu, steps)
-
-        # get the reference trajectory
-        goal_states = ctrl.get_references()
-
-        # Plot the open loop prediction
-        fig, axs = plt.subplots(nx + nu, 1, figsize=(5, 8))
-        fig.tight_layout()
-        for i in range(nx):
-            axs[i].plot(np.arange(steps + 1) * dt, x[i, :], 'b', label='pred')
-            axs[i].plot(np.arange(steps + 1) * dt, goal_states[i, :], 'r--', label='ref', )
-            axs[i].set_ylabel(f'$x_{i}$')
-            axs[i].legend()
-        for i in range(nu):
-            axs[nx + i].plot(np.arange(steps) * dt, u[i, :], 'b', label='pred')
-            axs[nx + i].set_ylabel(f'$u_{i}$')
-
-        plt.xlabel('Time [s]')
-        plt.show()
-    else:
-        print(colored('[Warning] No open loop solution to plot.', 'yellow'))