Skip to content

Removing traj_step #199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion safe_control_gym/controllers/lqr/ilqr.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def calculate_lqr_action(self, obs, step):

Args:
obs (ndarray): The observation at this timestep.
step (int): The timestep.
step (int): The current step/iteration of the environment.

Returns:
action (ndarray): The calculated action.
Expand Down
19 changes: 13 additions & 6 deletions safe_control_gym/controllers/mpc/gp_mpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,14 @@ def setup_gp_optimizer(self, n_ind_points):
self.z_ind_val = z_ind_val

def select_action_with_gp(self,
obs
obs,
step,
):
'''Solves nonlinear MPC problem to get next action.

Args:
obs (np.array): Current state/observation.
step (int): The current step/iteration of the environment.

Returns:
action (np.array): Input/action to the task/env.
Expand All @@ -603,13 +605,14 @@ def select_action_with_gp(self,
mean_post_factor = opti_dict['mean_post_factor']
z_ind = opti_dict['z_ind']
n_ind_points = opti_dict['n_ind_points']

# Assign the initial state.
opti.set_value(x_init, obs)

# Assign reference trajectory within horizon.
goal_states = self.get_references()
goal_states = self.get_references(step)
opti.set_value(x_ref, goal_states)
if self.mode == 'tracking':
self.traj_step += 1

# Set the probabilistic state and input constraint set limits.
state_constraint_set_prev, input_constraint_set_prev = self.precompute_probabilistic_limits()

Expand All @@ -627,6 +630,7 @@ def select_action_with_gp(self,

opti.set_value(mean_post_factor, mean_post_factor_val)
opti.set_value(z_ind, z_ind_val)

# Initial guess for the optimization problem.
if self.warmstart and self.x_prev is not None and self.u_prev is not None:
# Shift previous solutions by 1 step
Expand All @@ -636,6 +640,7 @@ def select_action_with_gp(self,
u_guess[:-1] = u_guess[1:]
opti.set_initial(x_var, x_guess)
opti.set_initial(u_var, u_guess)

# Solve the optimization problem.
try:
sol = opti.solve()
Expand All @@ -661,6 +666,7 @@ def select_action_with_gp(self,
self.results_dict['linear_pred'].append(lin_pred)
self.results_dict['gp_mean_eq_pred'].append(gp_contribution)
self.results_dict['gp_pred'].append(pred.numpy())

# Take the first one from solved action sequence.
if u_val.ndim > 1:
action = u_val[:, 0]
Expand Down Expand Up @@ -976,7 +982,8 @@ def select_action(self,
print('[ERROR]: Not yet supported.')
exit()
t1 = time.perf_counter()
action = self.select_action_with_gp(obs)
step = self.extract_step(info)
action = self.select_action_with_gp(obs, step)
t2 = time.perf_counter()
print(f'GP SELECT ACTION TIME: {(t2 - t1)}')
self.last_obs = obs
Expand Down Expand Up @@ -1010,7 +1017,7 @@ def reset(self):
elif self.env.TASK == Task.TRAJ_TRACKING:
self.mode = 'tracking'
self.traj = self.env.X_GOAL.T
self.traj_step = 0

# Dynamics model.
if self.gaussian_process is not None:
if self.sparse_gp and self.train_data['train_targets'].shape[0] <= self.n_ind_points:
Expand Down
9 changes: 5 additions & 4 deletions safe_control_gym/controllers/mpc/linear_mpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system
from safe_control_gym.controllers.mpc.mpc import MPC
from safe_control_gym.controllers.mpc.mpc_utils import compute_discrete_lqr_gain_from_cont_linear_system
from safe_control_gym.envs.benchmark_env import Task


class LinearMPC(MPC):
Expand Down Expand Up @@ -214,14 +213,16 @@ def select_action(self,

# Assign the initial state.
opti.set_value(x_init, obs - self.X_EQ)

# Assign reference trajectory within horizon.
goal_states = self.get_references()
step = self.extract_step(info)
goal_states = self.get_references(step)
opti.set_value(x_ref, goal_states)
if self.env.TASK == Task.TRAJ_TRACKING:
self.traj_step += 1

if self.warmstart and self.u_prev is not None and self.x_prev is not None:
opti.set_initial(x_var, self.x_prev)
opti.set_initial(u_var, self.u_prev)

# Solve the optimization problem.
try:
sol = opti.solve()
Expand Down
33 changes: 18 additions & 15 deletions safe_control_gym/controllers/mpc/mpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,6 @@ def reset_before_run(self, obs=None, info=None, env=None):
# Initialize previous state and action.
self.x_prev = None
self.u_prev = None
# Step along the reference.
self.traj_step = 0
super().reset_before_run(obs, info, env)

def reset(self):
Expand Down Expand Up @@ -274,15 +272,17 @@ def select_action(self,
'''
opti_dict = self.opti_dict
opti = opti_dict['opti']
x_var = opti_dict['x_var'] # optimization variables
u_var = opti_dict['u_var'] # optimization variables
x_init = opti_dict['x_init'] # initial state
x_ref = opti_dict['x_ref'] # reference state/trajectory
x_var = opti_dict['x_var'] # Optimization variables
u_var = opti_dict['u_var'] # Optimization variables
x_init = opti_dict['x_init'] # Initial state
x_ref = opti_dict['x_ref'] # Reference state/trajectory

# Assign the initial state.
opti.set_value(x_init, obs)

# Assign reference trajectory within horizon.
goal_states = self.get_references()
step = self.extract_step(info)
goal_states = self.get_references(step)
opti.set_value(x_ref, goal_states)

if self.warmstart and self.x_prev is not None and self.u_prev is not None:
Expand All @@ -294,10 +294,6 @@ def select_action(self,
opti.set_initial(x_var, x_guess)
opti.set_initial(u_var, u_guess)

if self.mode == 'tracking':
# Increment the trajectory step after update the reference and initial guess
self.traj_step += 1

# Solve the optimization problem.
try:
sol = opti.solve()
Expand Down Expand Up @@ -338,15 +334,22 @@ def select_action(self,
self.prev_action = action
return action

def get_references(self):
'''Constructs reference states along mpc horizon, (nx, T+1).'''
def get_references(self, step):
'''Constructs reference states along mpc horizon, (nx, T+1).

Args:
step (int): The current step/iteration of the environment.

Returns:
goal_states (ndarray): Reference states along MPC horizon, shape (nx, T+1).
'''
if self.env.TASK == Task.STABILIZATION:
# Repeat goal state for horizon steps.
goal_states = np.tile(self.env.X_GOAL.reshape(-1, 1), (1, self.T + 1))
elif self.env.TASK == Task.TRAJ_TRACKING:
# Slice trajectory for horizon steps, if not long enough, repeat last state.
start = min(self.traj_step, self.traj.shape[-1])
end = min(self.traj_step + self.T + 1, self.traj.shape[-1])
start = min(step, self.traj.shape[-1])
end = min(step + self.T + 1, self.traj.shape[-1])
remain = max(0, self.T + 1 - (end - start))
goal_states = np.concatenate([
self.traj[:, start:end],
Expand Down
5 changes: 2 additions & 3 deletions safe_control_gym/controllers/mpc/mpc_acados.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,8 @@ def select_action(self,
self.acados_ocp_solver.set(0, 'ubx', obs)

# Set reference for the control horizon
goal_states = self.get_references()
if self.mode == 'tracking':
self.traj_step += 1
step = self.extract_step(info)
goal_states = self.get_references(step)

y_ref = np.concatenate((goal_states[:, :-1],
np.repeat(self.U_EQ.reshape(-1, 1), self.T, axis=1)), axis=0)
Expand Down
37 changes: 0 additions & 37 deletions safe_control_gym/controllers/mpc/mpc_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
'''General MPC utility functions.'''

import casadi as cs
import matplotlib.pyplot as plt
import numpy as np
import scipy
import scipy.linalg
from termcolor import colored

from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system
from safe_control_gym.envs.constraints import ConstraintList
Expand Down Expand Up @@ -151,38 +149,3 @@ def set_acados_constraint_bound(constraint,
bound_value = 1e-6

return bound_value * np.ones(constraint.shape)


def plot_open_loop_sol(ctrl):
'''Plot the open loop prediction of the MPC controller.

Args:
ctrl (MPC): MPC controller object.
'''
if ctrl.x_prev is not None and ctrl.u_prev is not None:
nx = ctrl.x_prev.shape[0] # state dim
nu = ctrl.u_prev.shape[0] # input dim
steps = ctrl.T # prediction horizon
dt = ctrl.dt # ctrl frequency
x = ctrl.x_prev # open loop state (nx, steps + 1)
u = ctrl.u_prev # open loop input (nu, steps)

# get the reference trajectory
goal_states = ctrl.get_references()

# Plot the open loop prediction
fig, axs = plt.subplots(nx + nu, 1, figsize=(5, 8))
fig.tight_layout()
for i in range(nx):
axs[i].plot(np.arange(steps + 1) * dt, x[i, :], 'b', label='pred')
axs[i].plot(np.arange(steps + 1) * dt, goal_states[i, :], 'r--', label='ref', )
axs[i].set_ylabel(f'$x_{i}$')
axs[i].legend()
for i in range(nu):
axs[nx + i].plot(np.arange(steps) * dt, u[i, :], 'b', label='pred')
axs[nx + i].set_ylabel(f'$u_{i}$')

plt.xlabel('Time [s]')
plt.show()
else:
print(colored('[Warning] No open loop solution to plot.', 'yellow'))