-
Notifications
You must be signed in to change notification settings - Fork 87
Closed
Description
Giving the ClassAnFuelprice, I need help to resolve the shape mismatch for the Bounded Countineous and the Composit(agent composite) in the Marl _make_spec Method.
class AnFuelpriceEnv(EnvBase):
def init(self, num_envs, device, seed, categorical_actions, num_agents, episode_length=1000, **kwargs):
super().init(device=device, batch_size=torch.Size([num_envs]))
if seed is None:
seed = int(torch.empty((), dtype=torch.int64).random_().item())
self.set_seed(seed)
self.num_envs = num_envs # Store num_envs as an attribute
self.categorical_actions = categorical_actions
self.num_agents = num_agents
self.episode_length = episode_length
# Assuming these are defined elsewhere or will be defined in _make_spec
self.obs_dim = 13 # Example observation dimension per individual
self.date_dim = 1 # Example date dimension per individual
self.num_individuals_per_agent = 13 # Example number of individuals per agent
self.num_individual_actions = 3 # Example number of actions per individual
# Assuming agent names are 'agent_0', 'agent_1', etc.
self.agents = [f"agent_{i}" for i in range(self.num_agents)]
self._make_spec()
# self._reset() # Call reset after making specs
@staticmethod
def gen_params(batch_size: torch.Size = torch.Size([]), device: Union[str, torch.device] = "cpu") -> TensorDictBase:
# Ensure batch_size is a tuple of integers
if not isinstance(batch_size, torch.Size):
# Attempt to convert to torch.Size
try:
batch_size = torch.Size(batch_size)
except TypeError:
raise TypeError(f"Expected batch_size to be torch.Size or convertible to torch.Size, but got {type(batch_size)}")
data_path = '/content/drive/MyDrive/deep learning codes/EIAAPI_DOWNLOAD/solutions/mergedata/DataDic.pt'
data_columns = ['Forex', 'WTI', 'Brent', 'OPEC', 'Fuelprice5', 'Fuelprice6', 'Fuelprice7', 'Fuelprice8',
'Fuelprice9', 'Fuelprice10', 'Fuelprice11', 'Fuelprice12', 'Fuelprice13',
'reward0', 'reward1', 'reward2', 'reward3', 'reward4', 'reward5', 'reward6', 'reward7', 'reward8',
'reward9', 'reward10', 'reward11', 'reward12',
'action0', 'action1', 'action2', 'action3', 'action4', 'action5', 'action6', 'action7', 'action8',
'action9', 'action10', 'action11', 'action12', 'Date']
# Create num_envs instances of DDataenv if batch_size is provided, otherwise just one
num_instances = batch_size[0] if batch_size else 1
env_data = []
for _ in range(num_instances):
envv = DDataenv(data_path, data_columns)
env_data.append(envv.get_observation())
# Stack the observations to create batched tensors
batched_ac = {}
for key in env_data[0].keys():
batched_ac[key] = torch.stack([torch.tensor(data[key], device="cpu") for data in env_data])
# Explicitly ensure date parameters have shape (batch_size, 1,) if they are scalar
for k in ['Date', 'Date_min', 'Date_max']:
if k in batched_ac and batched_ac[k].ndim == 1: # Check if it's (batch_size,) and key exists
batched_ac[k] = batched_ac[k].unsqueeze(1) # Reshape to (batch_size, 1)
# Create a TensorDict from the batched data
td = TensorDict({"params": {k: v.to(device) for k, v in batched_ac.items()}}, batch_size=batch_size, device=device)
#print(f"gen_params: Created TensorDict with batch_size {batch_size}, shape: {td.shape})
return td
def _make_spec(self):
dummy_td_params = self.gen_params(torch.Size([1]), device=self.device)
# Store min/max data from the dummy TensorDict
self.obs_min_data = dummy_td_params['params','obsState&Fuel_min'].squeeze(0).clone().detach()
self.obs_max_data = dummy_td_params['params','obsState&Fuel_max'].squeeze(0).clone().detach()
self.Date_min_data = dummy_td_params['params','Date_min'].squeeze(0).clone().detach()
self.Date_max_data = dummy_td_params['params','Date_max'].squeeze(0).clone().detach()
self.reward_min_data = dummy_td_params['params','rewardState&reward_min'].squeeze(0).clone().detach()
self.reward_max_data = dummy_td_params['params','rewardState&reward_max'].squeeze(0).clone().detach()
self.action_min_data = dummy_td_params['params','actionState&action_min'].squeeze(0).clone().detach()
self.action_max_data = dummy_td_params['params','actionState&action_max'].squeeze(0).clone().detach()
# Define observation bounds (excluding Date)
obs_low_per_agent = self.obs_min_data # Shape [13]
obs_high_per_agent = self.obs_max_data # Shape [13]
obs_shape_per_agent = obs_low_per_agent.shape # Should be [13]
# Define position_key spec based on Date data (Unbounded)
position_key_shape_per_agent = torch.Size([1]) # Should be [1] per agent
# Define the inner Composite for agents within observation spec
agents_observation_composite_dict = {
# Observation spec for fuel and forex data (shape per agent)
"observation": Bounded( # Using Bounded for the main observation features
low= obs_low_per_agent, # Shape [13]
high= obs_high_per_agent, # Shape [13]
shape=obs_shape_per_agent, # Shape is [13]
dtype=torch.float32,
device=self.device,
),
"position_key": Unbounded( # Positional key spec based on Date (now Unbounded, shape per agent)
shape=position_key_shape_per_agent, # Shape is [1]
dtype=torch.float32, # Assuming Date is float
device=self.device,
),
# Keep done, terminated, truncated under agents with shape [1] per agent
"done": Binary(
shape=[1], # Shape is [1] per agent
dtype=torch.bool,
device=self.device,
),
"is_terminated": Binary(
shape=[1], # Shape is [1] per agent
dtype=torch.bool,
device=self.device,
),
"is_truncated": Binary(
shape=[1], # Shape is [1] per agent
dtype=torch.bool,
device=self.device,
),
}
print(f"agents_observation_composite_dict: {agents_observation_composite_dict}")
agents_observation_composite = Composite(
agents_observation_composite_dict,
# The shape of this inner Composite is now [num_agents]
shape=torch.Size([self.num_agents]), # Shape is [num_agents]
device=self.device,
)
# Observation spec - Standard TorchRL Multi-Agent Structure
# Top-level Composite: batch_size=[num_envs], shape=[]
# Inner Composite ('agents'): shape=[num_agents]
# Innermost Specs ('observation', 'position_key', 'done', 'is_terminated', 'is_truncated'): shape=[feature_dims] (per agent)
self.observation_spec = Composite(
{
"agents": agents_observation_composite
},
# The shape of the top-level Composite is empty, batch_size is handled separately
batch_size=self.batch_size,
device=self.device,
)
print(f"observation_spec: {self.observation_spec}")
self.num_individual_actions=13 # Corrected to 13 as per the user's request to match observation shape
# Action spec - Standard TorchRL Multi-Agent Structure
# Top-level Composite: batch_size=[num_envs], shape=[]
# Inner Composite ('agents'): shape=[num_agents]
# Innermost Spec ('action'): shape=[action_dims] (per agent)
if self.categorical_actions:
# For discrete actions, action feature shape is [num_individuals_per_agent]
action_feature_shape = torch.Size([self.num_individuals_per_agent]) # Shape [13] per agent
# nvec is per agent, set device [13]
nvec_per_agent = torch.full([self.num_individuals_per_agent], self.num_individual_actions, dtype=torch.int64, device=self.device)
# Define the inner Composite for agents with shape [num_agents]
agents_action_composite_dict = {
"action": MultiCategorical(
nvec=nvec_per_agent, # nvec is per agent group now [13]
shape=action_feature_shape, # Shape is just feature dims per agent [13]
dtype=torch.int64,
device=self.device, # Explicitly set device
)
}
print(f"agents_action_composite_dict (categorical): {agents_action_composite_dict}")
agents_action_composite = Composite(
agents_action_composite_dict,
# The shape of this inner Composite is now [num_agents]
shape=torch.Size([self.num_agents]),
device=self.device,
)
self.action_spec = Composite(
{
"agents": agents_action_composite
},
# The shape of the top-level Composite is empty, batch_size is handled separately
batch_size=self.batch_size,
device=self.device,
)
print(f"action_spec (categorical): {self.action_spec}")
else:
# For continuous actions, define the spec as a single UnboundedContinuousTensorSpec
# with shape [num_agents, num_individuals_per_agent]
continuous_action_shape = torch.Size([self.num_agents, self.num_individuals_per_agent])
action_spec = UnboundedContinuousTensorSpec(
shape=continuous_action_shape, # Combined shape for all agents' continuous actions
dtype=torch.float32,
device=self.device,
)
print(f"action_spec (continuous): {action_spec}")
# The action spec is just this single spec at the top level
self.action_spec = action_spec
print(f"action_spec (continuous, top-level): {self.action_spec}")
# Reward spec - Standard TorchRL Multi-Agent Structure
# Top-level Composite: batch_size=[num_envs], shape=[]
# Inner Composite ('agents'): shape=[num_agents]
# Innermost Spec ('reward'): shape=[1] (per agent)
reward_feature_shape = torch.Size([1])
agents_reward_composite_dict = {
"reward": Unbounded( # Corrected shape to [1] per agent
shape=[1], # Corrected shape definition
dtype=torch.float32,
device=self.device,
)
}
print(f"agents_reward_composite_dict: {agents_reward_composite_dict}")
agents_reward_composite = Composite(
agents_reward_composite_dict,
shape=torch.Size([self.num_agents]), # Shape is [num_agents]
device=self.device,
)
print(f"agents_reward_composite: {agents_reward_composite}")
self.full_reward_spec = Composite(
{
"agents": agents_reward_composite
},
# The shape of the top-level Composite is empty, batch_size is handled separately
batch_size=self.batch_size,
device=self.device,
)
print(f"full_reward_spec: {self.full_reward_spec}")
# Define the inner Composite for info spec
info_composite_dict = { # Added spec for step_count
"step_count": Unbounded(
shape=[1], # Corrected shape definition [1] per environment
dtype=torch.int64,
device=self.device,
)
}
print(f"info_composite_dict: {info_composite_dict}")
info_composite = Composite( # Info is typically per environment, shape is empty []
info_composite_dict,
# The shape of this inner Composite is empty, it's per environment
shape=torch.Size([]), # Shape is empty
device=self.device, # Device of the inner Composite
)
print(f"info_composite: {info_composite}")
# Info spec (typically per environment) - Standard Composite for batched env info
# Top-level Composite: batch_size=[num_envs], shape=[]
# Inner Composite ('info'): shape=[] - Info is per environment batch, not per agent group
self.info_spec = Composite(
{
"info": info_composite
},
batch_size=self.batch_size,
device=self.device,
)
print(f"info_spec: {self.info_spec}")
# Define unbatched info spec for CustomEnvClass
info_composite_unbatched_dict = { # Added spec for step_count
"step_count": Unbounded(
shape=[1], # Corrected shape definition [1] per environment
dtype=torch.int64,
device=self.device,
)
}
print(f"info_composite_unbatched_dict: {info_composite_unbatched_dict}")
info_composite_unbatched = Composite( # Info is typically per environment, shape is empty []
info_composite_unbatched_dict,
# The shape of this inner Composite is empty, it's per environment
shape=torch.Size([]), # Shape is empty
device=self.device, # Device of the inner Composite
)
print(f"info_composite_unbatched: {info_composite_unbatched}")
self.info_spec_unbatched = Composite(
{
"info": info_composite_unbatched
},
batch_size=torch.Size([]), # Unbatched spec has empty batch size
device=self.device,
)
print(f"info_spec_unbatched: {self.info_spec_unbatched}")
Metadata
Metadata
Assignees
Labels
No labels